]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] simd: make OpMasked machine ops only
authorJunyang Shao <shaojunyang@google.com>
Mon, 18 Aug 2025 21:13:00 +0000 (21:13 +0000)
committerJunyang Shao <shaojunyang@google.com>
Tue, 19 Aug 2025 20:46:58 +0000 (13:46 -0700)
Right now we can expect the `Op(...).Masked` idiom to lack many parts that will
make the API incomplete. But to make the API sizes smaller, we are removing these ops' frontend types and interfaces for now. We will have the peepholes and a new pass
checking the CPU features check domination relations to make these ops
picked for the right `Op(...).Masked` idiom.

Change-Id: I77f72a198b3d8b1880dcb911470db5e0089ac1ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/697155
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Bypass: Junyang Shao <shaojunyang@google.com>

src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/_gen/simdgen/godefs.go
src/simd/compare_test.go
src/simd/ops_amd64.go
src/simd/simd_test.go

index d64f36cf74e9c2c112da04dfddce65fd0083759e..cfe0075986f80ebcb9c5b3de6e7acdeda52642c1 100644 (file)
 (AbsInt64x2 ...) => (VPABSQ128 ...)
 (AbsInt64x4 ...) => (VPABSQ256 ...)
 (AbsInt64x8 ...) => (VPABSQ512 ...)
-(AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddFloat32x4 ...) => (VADDPS128 ...)
 (AddFloat32x8 ...) => (VADDPS256 ...)
 (AddFloat32x16 ...) => (VADDPS512 ...)
 (AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
 (AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
 (AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
-(AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
 (AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
 (AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
 (AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
 (AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
-(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AddPairsFloat32x4 ...) => (VHADDPS128 ...)
 (AddPairsFloat32x8 ...) => (VHADDPS256 ...)
 (AddPairsFloat64x2 ...) => (VHADDPD128 ...)
 (AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
 (AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
 (AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
-(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
 (AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
 (AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
 (AndUint64x2 ...) => (VPAND128 ...)
 (AndUint64x4 ...) => (VPAND256 ...)
 (AndUint64x8 ...) => (VPANDQ512 ...)
-(AndMaskedInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AndNotInt8x16 ...) => (VPANDN128 ...)
 (AndNotInt8x32 ...) => (VPANDN256 ...)
 (AndNotInt8x64 ...) => (VPANDND512 ...)
 (AndNotUint64x2 ...) => (VPANDN128 ...)
 (AndNotUint64x4 ...) => (VPANDN256 ...)
 (AndNotUint64x8 ...) => (VPANDNQ512 ...)
-(AndNotMaskedInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AverageUint8x16 ...) => (VPAVGB128 ...)
 (AverageUint8x32 ...) => (VPAVGB256 ...)
 (AverageUint8x64 ...) => (VPAVGB512 ...)
 (AverageUint16x8 ...) => (VPAVGW128 ...)
 (AverageUint16x16 ...) => (VPAVGW256 ...)
 (AverageUint16x32 ...) => (VPAVGW512 ...)
-(AverageMaskedUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AverageMaskedUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AverageMaskedUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
 (Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
 (Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
 (Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
 (Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
 (Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
 (Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
 (Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
 (Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
 (Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
 (Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
 (Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
 (Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
 (Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
 (Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
 (Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (CeilFloat32x4 x) => (VROUNDPS128 [2] x)
 (CeilFloat32x8 x) => (VROUNDPS256 [2] x)
 (CeilFloat64x2 x) => (VROUNDPD128 [2] x)
 (CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
 (CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
 (CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
-(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
 (CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
 (CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
 (CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
 (CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
 (CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
-(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
 (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
 (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
-(ConvertToInt32MaskedFloat32x4 x mask) => (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x8 x mask) => (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x16 x mask) => (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
 (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
 (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
-(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (CopySignInt8x16 ...) => (VPSIGNB128 ...)
 (CopySignInt8x32 ...) => (VPSIGNB256 ...)
 (CopySignInt16x8 ...) => (VPSIGNW128 ...)
 (DivFloat64x2 ...) => (VDIVPD128 ...)
 (DivFloat64x4 ...) => (VDIVPD256 ...)
 (DivFloat64x8 ...) => (VDIVPD512 ...)
-(DivMaskedFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DivMaskedFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DivMaskedFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
 (DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
 (DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
-(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
 (DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
 (DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
 (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
 (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
 (EqualUint64x2 ...) => (VPCMPEQQ128 ...)
 (EqualUint64x4 ...) => (VPCMPEQQ256 ...)
 (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
 (FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
 (FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
-(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
 (FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
 (FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
 (FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
 (FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
 (FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
-(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
 (GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
 (GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
 (GaloisFieldAffineTransformInverseUint8x16 ...) => (VGF2P8AFFINEINVQB128 ...)
 (GaloisFieldAffineTransformInverseUint8x32 ...) => (VGF2P8AFFINEINVQB256 ...)
 (GaloisFieldAffineTransformInverseUint8x64 ...) => (VGF2P8AFFINEINVQB512 ...)
-(GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
 (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
 (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
-(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
 (GetElemFloat32x4 ...) => (VPEXTRD128 ...)
 (GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
 (GetElemInt8x16 ...) => (VPEXTRB128 ...)
 (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
 (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
 (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
 (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
 (IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
 (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
 (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
 (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-(IsNanMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (LessFloat32x4 x y) => (VCMPPS128 [1] x y)
 (LessFloat32x8 x y) => (VCMPPS256 [1] x y)
 (LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
 (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
 (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
 (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (MaxFloat32x4 ...) => (VMAXPS128 ...)
 (MaxFloat32x8 ...) => (VMAXPS256 ...)
 (MaxFloat32x16 ...) => (VMAXPS512 ...)
 (MaxUint64x2 ...) => (VPMAXUQ128 ...)
 (MaxUint64x4 ...) => (VPMAXUQ256 ...)
 (MaxUint64x8 ...) => (VPMAXUQ512 ...)
-(MaxMaskedFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MinFloat32x4 ...) => (VMINPS128 ...)
 (MinFloat32x8 ...) => (VMINPS256 ...)
 (MinFloat32x16 ...) => (VMINPS512 ...)
 (MinUint64x2 ...) => (VPMINUQ128 ...)
 (MinUint64x4 ...) => (VPMINUQ256 ...)
 (MinUint64x8 ...) => (VPMINUQ512 ...)
-(MinMaskedFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulFloat32x4 ...) => (VMULPS128 ...)
 (MulFloat32x8 ...) => (VMULPS256 ...)
 (MulFloat32x16 ...) => (VMULPS512 ...)
 (MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
 (MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
 (MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
-(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
 (MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
 (MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
 (MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
 (MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
 (MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
-(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
 (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
 (MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
 (MulHighUint16x8 ...) => (VPMULHUW128 ...)
 (MulHighUint16x16 ...) => (VPMULHUW256 ...)
 (MulHighUint16x32 ...) => (VPMULHUW512 ...)
-(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
 (MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
 (MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
 (MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
 (MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
 (MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
-(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
 (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
 (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
 (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
 (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
 (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
-(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
 (OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
 (OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
 (OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
 (OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
 (OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
-(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (OrInt8x16 ...) => (VPOR128 ...)
 (OrInt8x32 ...) => (VPOR256 ...)
 (OrInt8x64 ...) => (VPORD512 ...)
 (OrUint64x2 ...) => (VPOR128 ...)
 (OrUint64x4 ...) => (VPOR256 ...)
 (OrUint64x8 ...) => (VPORQ512 ...)
-(OrMaskedInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (PermuteFloat32x8 ...) => (VPERMPS256 ...)
 (PermuteFloat32x16 ...) => (VPERMPS512 ...)
 (PermuteFloat64x4 ...) => (VPERMPD256 ...)
 (Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
 (Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
 (Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
-(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
 (ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
 (ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
 (ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
 (ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
 (ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
-(ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
 (ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
 (ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
 (ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
 (ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
 (ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
-(ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
 (RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
 (RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
 (RotateAllLeftUint64x2 ...) => (VPROLQ128 ...)
 (RotateAllLeftUint64x4 ...) => (VPROLQ256 ...)
 (RotateAllLeftUint64x8 ...) => (VPROLQ512 ...)
-(RotateAllLeftMaskedInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateAllRightInt32x4 ...) => (VPRORD128 ...)
 (RotateAllRightInt32x8 ...) => (VPRORD256 ...)
 (RotateAllRightInt32x16 ...) => (VPRORD512 ...)
 (RotateAllRightUint64x2 ...) => (VPRORQ128 ...)
 (RotateAllRightUint64x4 ...) => (VPRORQ256 ...)
 (RotateAllRightUint64x8 ...) => (VPRORQ512 ...)
-(RotateAllRightMaskedInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateLeftInt32x4 ...) => (VPROLVD128 ...)
 (RotateLeftInt32x8 ...) => (VPROLVD256 ...)
 (RotateLeftInt32x16 ...) => (VPROLVD512 ...)
 (RotateLeftUint64x2 ...) => (VPROLVQ128 ...)
 (RotateLeftUint64x4 ...) => (VPROLVQ256 ...)
 (RotateLeftUint64x8 ...) => (VPROLVQ512 ...)
-(RotateLeftMaskedInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RotateRightInt32x4 ...) => (VPRORVD128 ...)
 (RotateRightInt32x8 ...) => (VPRORVD256 ...)
 (RotateRightInt32x16 ...) => (VPRORVD512 ...)
 (RotateRightUint64x2 ...) => (VPRORVQ128 ...)
 (RotateRightUint64x4 ...) => (VPRORVQ256 ...)
 (RotateRightUint64x8 ...) => (VPRORVQ512 ...)
-(RotateRightMaskedInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
 (RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
 (RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
 (RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
 (RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
 (RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
 (RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
 (RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
 (RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
 (RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
 (RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
 (ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
 (ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
 (ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
 (ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
 (ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
-(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SetElemFloat32x4 ...) => (VPINSRD128 ...)
 (SetElemFloat64x2 ...) => (VPINSRQ128 ...)
 (SetElemInt8x16 ...) => (VPINSRB128 ...)
 (ShiftAllLeftConcatUint64x2 ...) => (VPSHLDQ128 ...)
 (ShiftAllLeftConcatUint64x4 ...) => (VPSHLDQ256 ...)
 (ShiftAllLeftConcatUint64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
 (VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
 (ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
 (ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
 (ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
 (ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightConcatMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
 (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
 (ShiftLeftConcatUint64x2 ...) => (VPSHLDVQ128 ...)
 (ShiftLeftConcatUint64x4 ...) => (VPSHLDVQ256 ...)
 (ShiftLeftConcatUint64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftConcatMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
 (ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
 (ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
 (ShiftRightConcatUint64x2 ...) => (VPSHRDVQ128 ...)
 (ShiftRightConcatUint64x4 ...) => (VPSHRDVQ256 ...)
 (ShiftRightConcatUint64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightConcatMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SqrtFloat32x4 ...) => (VSQRTPS128 ...)
 (SqrtFloat32x8 ...) => (VSQRTPS256 ...)
 (SqrtFloat32x16 ...) => (VSQRTPS512 ...)
 (SqrtFloat64x2 ...) => (VSQRTPD128 ...)
 (SqrtFloat64x4 ...) => (VSQRTPD256 ...)
 (SqrtFloat64x8 ...) => (VSQRTPD512 ...)
-(SqrtMaskedFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubFloat32x4 ...) => (VSUBPS128 ...)
 (SubFloat32x8 ...) => (VSUBPS256 ...)
 (SubFloat32x16 ...) => (VSUBPS512 ...)
 (SubUint64x2 ...) => (VPSUBQ128 ...)
 (SubUint64x4 ...) => (VPSUBQ256 ...)
 (SubUint64x8 ...) => (VPSUBQ512 ...)
-(SubMaskedFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
 (SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
 (SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
 (SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
 (SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
 (SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
-(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 (TruncFloat32x4 x) => (VROUNDPS128 [3] x)
 (TruncFloat32x8 x) => (VROUNDPS256 [3] x)
 (TruncFloat64x2 x) => (VROUNDPD128 [3] x)
 (TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
 (TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
 (TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
-(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
 (TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
 (TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
 (TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
 (TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
 (TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
-(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (XorInt8x16 ...) => (VPXOR128 ...)
 (XorInt8x32 ...) => (VPXOR256 ...)
 (XorInt8x64 ...) => (VPXORD512 ...)
 (XorUint64x2 ...) => (VPXOR128 ...)
 (XorUint64x4 ...) => (VPXOR256 ...)
 (XorUint64x8 ...) => (VPXORQ512 ...)
-(XorMaskedInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (blendInt8x16 ...) => (VPBLENDVB128 ...)
 (blendInt8x32 ...) => (VPBLENDVB256 ...)
 (blendMaskedInt8x64 x y mask) => (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
index d98c0d8152acef16dba65527de3344f6e159e35b..08bfe36951154b51f0c8744de599511aff3c1c41 100644 (file)
@@ -16,36 +16,15 @@ func simdGenericOps() []opData {
                {name: "AbsInt64x2", argLength: 1, commutative: false},
                {name: "AbsInt64x4", argLength: 1, commutative: false},
                {name: "AbsInt64x8", argLength: 1, commutative: false},
-               {name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
                {name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
                {name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
                {name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
                {name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
                {name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
                {name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
                {name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
                {name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
                {name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
                {name: "AddFloat32x4", argLength: 2, commutative: true},
                {name: "AddFloat32x8", argLength: 2, commutative: true},
                {name: "AddFloat32x16", argLength: 2, commutative: true},
@@ -64,36 +43,6 @@ func simdGenericOps() []opData {
                {name: "AddInt64x2", argLength: 2, commutative: true},
                {name: "AddInt64x4", argLength: 2, commutative: true},
                {name: "AddInt64x8", argLength: 2, commutative: true},
-               {name: "AddMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x8", argLength: 3, commutative: true},
                {name: "AddPairsFloat32x4", argLength: 2, commutative: false},
                {name: "AddPairsFloat32x8", argLength: 2, commutative: false},
                {name: "AddPairsFloat64x2", argLength: 2, commutative: false},
@@ -114,18 +63,6 @@ func simdGenericOps() []opData {
                {name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
                {name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
                {name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
-               {name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
                {name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
                {name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
                {name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
@@ -160,18 +97,6 @@ func simdGenericOps() []opData {
                {name: "AndInt64x2", argLength: 2, commutative: true},
                {name: "AndInt64x4", argLength: 2, commutative: true},
                {name: "AndInt64x8", argLength: 2, commutative: true},
-               {name: "AndMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "AndMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "AndMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x8", argLength: 3, commutative: true},
                {name: "AndNotInt8x16", argLength: 2, commutative: false},
                {name: "AndNotInt8x32", argLength: 2, commutative: false},
                {name: "AndNotInt8x64", argLength: 2, commutative: false},
@@ -184,18 +109,6 @@ func simdGenericOps() []opData {
                {name: "AndNotInt64x2", argLength: 2, commutative: false},
                {name: "AndNotInt64x4", argLength: 2, commutative: false},
                {name: "AndNotInt64x8", argLength: 2, commutative: false},
-               {name: "AndNotMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
                {name: "AndNotUint8x16", argLength: 2, commutative: false},
                {name: "AndNotUint8x32", argLength: 2, commutative: false},
                {name: "AndNotUint8x64", argLength: 2, commutative: false},
@@ -220,12 +133,6 @@ func simdGenericOps() []opData {
                {name: "AndUint64x2", argLength: 2, commutative: true},
                {name: "AndUint64x4", argLength: 2, commutative: true},
                {name: "AndUint64x8", argLength: 2, commutative: true},
-               {name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x32", argLength: 3, commutative: true},
                {name: "AverageUint8x16", argLength: 2, commutative: true},
                {name: "AverageUint8x32", argLength: 2, commutative: true},
                {name: "AverageUint8x64", argLength: 2, commutative: true},
@@ -238,16 +145,6 @@ func simdGenericOps() []opData {
                {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
                {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
                {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
                {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
                {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
                {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
@@ -258,16 +155,6 @@ func simdGenericOps() []opData {
                {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
                {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
                {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
                {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
                {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
                {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
@@ -278,16 +165,6 @@ func simdGenericOps() []opData {
                {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
                {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
                {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
                {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
                {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
                {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
@@ -329,15 +206,9 @@ func simdGenericOps() []opData {
                {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
                {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
                {name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x16", argLength: 2, commutative: false},
                {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
                {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
                {name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
                {name: "CopySignInt8x16", argLength: 2, commutative: false},
                {name: "CopySignInt8x32", argLength: 2, commutative: false},
                {name: "CopySignInt16x8", argLength: 2, commutative: false},
@@ -350,21 +221,9 @@ func simdGenericOps() []opData {
                {name: "DivFloat64x2", argLength: 2, commutative: false},
                {name: "DivFloat64x4", argLength: 2, commutative: false},
                {name: "DivFloat64x8", argLength: 2, commutative: false},
-               {name: "DivMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
                {name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
                {name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
                {name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
-               {name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
                {name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
                {name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
                {name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
@@ -386,36 +245,6 @@ func simdGenericOps() []opData {
                {name: "EqualInt64x2", argLength: 2, commutative: true},
                {name: "EqualInt64x4", argLength: 2, commutative: true},
                {name: "EqualInt64x8", argLength: 2, commutative: true},
-               {name: "EqualMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x8", argLength: 3, commutative: true},
                {name: "EqualUint8x16", argLength: 2, commutative: true},
                {name: "EqualUint8x32", argLength: 2, commutative: true},
                {name: "EqualUint8x64", argLength: 2, commutative: true},
@@ -462,9 +291,6 @@ func simdGenericOps() []opData {
                {name: "FloorFloat32x8", argLength: 1, commutative: false},
                {name: "FloorFloat64x2", argLength: 1, commutative: false},
                {name: "FloorFloat64x4", argLength: 1, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
                {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
                {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
                {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
@@ -518,36 +344,6 @@ func simdGenericOps() []opData {
                {name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
                {name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
                {name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
                {name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
                {name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
                {name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
@@ -570,36 +366,6 @@ func simdGenericOps() []opData {
                {name: "GreaterInt64x2", argLength: 2, commutative: false},
                {name: "GreaterInt64x4", argLength: 2, commutative: false},
                {name: "GreaterInt64x8", argLength: 2, commutative: false},
-               {name: "GreaterMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
                {name: "GreaterUint8x64", argLength: 2, commutative: false},
                {name: "GreaterUint16x32", argLength: 2, commutative: false},
                {name: "GreaterUint32x16", argLength: 2, commutative: false},
@@ -610,12 +376,6 @@ func simdGenericOps() []opData {
                {name: "IsNanFloat64x2", argLength: 2, commutative: true},
                {name: "IsNanFloat64x4", argLength: 2, commutative: true},
                {name: "IsNanFloat64x8", argLength: 2, commutative: true},
-               {name: "IsNanMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x8", argLength: 3, commutative: true},
                {name: "LessEqualFloat32x4", argLength: 2, commutative: false},
                {name: "LessEqualFloat32x8", argLength: 2, commutative: false},
                {name: "LessEqualFloat32x16", argLength: 2, commutative: false},
@@ -626,36 +386,6 @@ func simdGenericOps() []opData {
                {name: "LessEqualInt16x32", argLength: 2, commutative: false},
                {name: "LessEqualInt32x16", argLength: 2, commutative: false},
                {name: "LessEqualInt64x8", argLength: 2, commutative: false},
-               {name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
                {name: "LessEqualUint8x64", argLength: 2, commutative: false},
                {name: "LessEqualUint16x32", argLength: 2, commutative: false},
                {name: "LessEqualUint32x16", argLength: 2, commutative: false},
@@ -670,36 +400,6 @@ func simdGenericOps() []opData {
                {name: "LessInt16x32", argLength: 2, commutative: false},
                {name: "LessInt32x16", argLength: 2, commutative: false},
                {name: "LessInt64x8", argLength: 2, commutative: false},
-               {name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x8", argLength: 3, commutative: false},
                {name: "LessUint8x64", argLength: 2, commutative: false},
                {name: "LessUint16x32", argLength: 2, commutative: false},
                {name: "LessUint32x16", argLength: 2, commutative: false},
@@ -722,36 +422,6 @@ func simdGenericOps() []opData {
                {name: "MaxInt64x2", argLength: 2, commutative: true},
                {name: "MaxInt64x4", argLength: 2, commutative: true},
                {name: "MaxInt64x8", argLength: 2, commutative: true},
-               {name: "MaxMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x8", argLength: 3, commutative: true},
                {name: "MaxUint8x16", argLength: 2, commutative: true},
                {name: "MaxUint8x32", argLength: 2, commutative: true},
                {name: "MaxUint8x64", argLength: 2, commutative: true},
@@ -782,36 +452,6 @@ func simdGenericOps() []opData {
                {name: "MinInt64x2", argLength: 2, commutative: true},
                {name: "MinInt64x4", argLength: 2, commutative: true},
                {name: "MinInt64x8", argLength: 2, commutative: true},
-               {name: "MinMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x8", argLength: 3, commutative: true},
                {name: "MinUint8x16", argLength: 2, commutative: true},
                {name: "MinUint8x32", argLength: 2, commutative: true},
                {name: "MinUint8x64", argLength: 2, commutative: true},
@@ -830,24 +470,12 @@ func simdGenericOps() []opData {
                {name: "MulAddFloat64x2", argLength: 3, commutative: false},
                {name: "MulAddFloat64x4", argLength: 3, commutative: false},
                {name: "MulAddFloat64x8", argLength: 3, commutative: false},
-               {name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
                {name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
                {name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
                {name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
                {name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
                {name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
                {name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
-               {name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
                {name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
                {name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
                {name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
@@ -861,12 +489,6 @@ func simdGenericOps() []opData {
                {name: "MulHighInt16x8", argLength: 2, commutative: true},
                {name: "MulHighInt16x16", argLength: 2, commutative: true},
                {name: "MulHighInt16x32", argLength: 2, commutative: true},
-               {name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
                {name: "MulHighUint16x8", argLength: 2, commutative: true},
                {name: "MulHighUint16x16", argLength: 2, commutative: true},
                {name: "MulHighUint16x32", argLength: 2, commutative: true},
@@ -879,42 +501,12 @@ func simdGenericOps() []opData {
                {name: "MulInt64x2", argLength: 2, commutative: true},
                {name: "MulInt64x4", argLength: 2, commutative: true},
                {name: "MulInt64x8", argLength: 2, commutative: true},
-               {name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x8", argLength: 3, commutative: true},
                {name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
                {name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
                {name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
                {name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
                {name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
                {name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
-               {name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
                {name: "MulUint16x8", argLength: 2, commutative: true},
                {name: "MulUint16x16", argLength: 2, commutative: true},
                {name: "MulUint16x32", argLength: 2, commutative: true},
@@ -934,36 +526,6 @@ func simdGenericOps() []opData {
                {name: "NotEqualInt16x32", argLength: 2, commutative: true},
                {name: "NotEqualInt32x16", argLength: 2, commutative: true},
                {name: "NotEqualInt64x8", argLength: 2, commutative: true},
-               {name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
                {name: "NotEqualUint8x64", argLength: 2, commutative: true},
                {name: "NotEqualUint16x32", argLength: 2, commutative: true},
                {name: "NotEqualUint32x16", argLength: 2, commutative: true},
@@ -980,30 +542,6 @@ func simdGenericOps() []opData {
                {name: "OnesCountInt64x2", argLength: 1, commutative: false},
                {name: "OnesCountInt64x4", argLength: 1, commutative: false},
                {name: "OnesCountInt64x8", argLength: 1, commutative: false},
-               {name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
                {name: "OnesCountUint8x16", argLength: 1, commutative: false},
                {name: "OnesCountUint8x32", argLength: 1, commutative: false},
                {name: "OnesCountUint8x64", argLength: 1, commutative: false},
@@ -1028,18 +566,6 @@ func simdGenericOps() []opData {
                {name: "OrInt64x2", argLength: 2, commutative: true},
                {name: "OrInt64x4", argLength: 2, commutative: true},
                {name: "OrInt64x8", argLength: 2, commutative: true},
-               {name: "OrMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "OrMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "OrMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x8", argLength: 3, commutative: true},
                {name: "OrUint8x16", argLength: 2, commutative: true},
                {name: "OrUint8x32", argLength: 2, commutative: true},
                {name: "OrUint8x64", argLength: 2, commutative: true},
@@ -1070,36 +596,6 @@ func simdGenericOps() []opData {
                {name: "Permute2Int64x2", argLength: 3, commutative: false},
                {name: "Permute2Int64x4", argLength: 3, commutative: false},
                {name: "Permute2Int64x8", argLength: 3, commutative: false},
-               {name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
                {name: "Permute2Uint8x16", argLength: 3, commutative: false},
                {name: "Permute2Uint8x32", argLength: 3, commutative: false},
                {name: "Permute2Uint8x64", argLength: 3, commutative: false},
@@ -1126,30 +622,6 @@ func simdGenericOps() []opData {
                {name: "PermuteInt32x16", argLength: 2, commutative: false},
                {name: "PermuteInt64x4", argLength: 2, commutative: false},
                {name: "PermuteInt64x8", argLength: 2, commutative: false},
-               {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
                {name: "PermuteUint8x16", argLength: 2, commutative: false},
                {name: "PermuteUint8x32", argLength: 2, commutative: false},
                {name: "PermuteUint8x64", argLength: 2, commutative: false},
@@ -1166,42 +638,18 @@ func simdGenericOps() []opData {
                {name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
                {name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
                {name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
-               {name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
                {name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
                {name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
                {name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
                {name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
                {name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
                {name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
                {name: "RotateLeftInt32x4", argLength: 2, commutative: false},
                {name: "RotateLeftInt32x8", argLength: 2, commutative: false},
                {name: "RotateLeftInt32x16", argLength: 2, commutative: false},
                {name: "RotateLeftInt64x2", argLength: 2, commutative: false},
                {name: "RotateLeftInt64x4", argLength: 2, commutative: false},
                {name: "RotateLeftInt64x8", argLength: 2, commutative: false},
-               {name: "RotateLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x8", argLength: 3, commutative: false},
                {name: "RotateLeftUint32x4", argLength: 2, commutative: false},
                {name: "RotateLeftUint32x8", argLength: 2, commutative: false},
                {name: "RotateLeftUint32x16", argLength: 2, commutative: false},
@@ -1214,18 +662,6 @@ func simdGenericOps() []opData {
                {name: "RotateRightInt64x2", argLength: 2, commutative: false},
                {name: "RotateRightInt64x4", argLength: 2, commutative: false},
                {name: "RotateRightInt64x8", argLength: 2, commutative: false},
-               {name: "RotateRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x8", argLength: 3, commutative: false},
                {name: "RotateRightUint32x4", argLength: 2, commutative: false},
                {name: "RotateRightUint32x8", argLength: 2, commutative: false},
                {name: "RotateRightUint32x16", argLength: 2, commutative: false},
@@ -1242,12 +678,6 @@ func simdGenericOps() []opData {
                {name: "ScaleFloat64x2", argLength: 2, commutative: false},
                {name: "ScaleFloat64x4", argLength: 2, commutative: false},
                {name: "ScaleFloat64x8", argLength: 2, commutative: false},
-               {name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
                {name: "SetHiFloat32x8", argLength: 2, commutative: false},
                {name: "SetHiFloat32x16", argLength: 2, commutative: false},
                {name: "SetHiFloat64x4", argLength: 2, commutative: false},
@@ -1297,24 +727,6 @@ func simdGenericOps() []opData {
                {name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false},
                {name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false},
                {name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x8", argLength: 3, commutative: false},
                {name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
                {name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
                {name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
@@ -1333,24 +745,6 @@ func simdGenericOps() []opData {
                {name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
                {name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
                {name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x8", argLength: 3, commutative: false},
                {name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
                {name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
                {name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
@@ -1369,24 +763,6 @@ func simdGenericOps() []opData {
                {name: "ShiftLeftConcatInt64x2", argLength: 3, commutative: false},
                {name: "ShiftLeftConcatInt64x4", argLength: 3, commutative: false},
                {name: "ShiftLeftConcatInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x32", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x2", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x32", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x2", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x8", argLength: 4, commutative: false},
                {name: "ShiftLeftConcatUint16x8", argLength: 3, commutative: false},
                {name: "ShiftLeftConcatUint16x16", argLength: 3, commutative: false},
                {name: "ShiftLeftConcatUint16x32", argLength: 3, commutative: false},
@@ -1405,24 +781,6 @@ func simdGenericOps() []opData {
                {name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
                {name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
                {name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftLeftMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x8", argLength: 3, commutative: false},
                {name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
                {name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
                {name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
@@ -1441,24 +799,6 @@ func simdGenericOps() []opData {
                {name: "ShiftRightConcatInt64x2", argLength: 3, commutative: false},
                {name: "ShiftRightConcatInt64x4", argLength: 3, commutative: false},
                {name: "ShiftRightConcatInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x32", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x2", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x32", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x2", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x8", argLength: 4, commutative: false},
                {name: "ShiftRightConcatUint16x8", argLength: 3, commutative: false},
                {name: "ShiftRightConcatUint16x16", argLength: 3, commutative: false},
                {name: "ShiftRightConcatUint16x32", argLength: 3, commutative: false},
@@ -1477,24 +817,6 @@ func simdGenericOps() []opData {
                {name: "ShiftRightInt64x2", argLength: 2, commutative: false},
                {name: "ShiftRightInt64x4", argLength: 2, commutative: false},
                {name: "ShiftRightInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
                {name: "ShiftRightUint16x8", argLength: 2, commutative: false},
                {name: "ShiftRightUint16x16", argLength: 2, commutative: false},
                {name: "ShiftRightUint16x32", argLength: 2, commutative: false},
@@ -1510,12 +832,6 @@ func simdGenericOps() []opData {
                {name: "SqrtFloat64x2", argLength: 1, commutative: false},
                {name: "SqrtFloat64x4", argLength: 1, commutative: false},
                {name: "SqrtFloat64x8", argLength: 1, commutative: false},
-               {name: "SqrtMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x8", argLength: 2, commutative: false},
                {name: "SubFloat32x4", argLength: 2, commutative: false},
                {name: "SubFloat32x8", argLength: 2, commutative: false},
                {name: "SubFloat32x16", argLength: 2, commutative: false},
@@ -1534,36 +850,6 @@ func simdGenericOps() []opData {
                {name: "SubInt64x2", argLength: 2, commutative: false},
                {name: "SubInt64x4", argLength: 2, commutative: false},
                {name: "SubInt64x8", argLength: 2, commutative: false},
-               {name: "SubMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x8", argLength: 3, commutative: false},
                {name: "SubPairsFloat32x4", argLength: 2, commutative: false},
                {name: "SubPairsFloat32x8", argLength: 2, commutative: false},
                {name: "SubPairsFloat64x2", argLength: 2, commutative: false},
@@ -1584,18 +870,6 @@ func simdGenericOps() []opData {
                {name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
                {name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
                {name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
-               {name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
                {name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
                {name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
                {name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
@@ -1630,18 +904,6 @@ func simdGenericOps() []opData {
                {name: "XorInt64x2", argLength: 2, commutative: true},
                {name: "XorInt64x4", argLength: 2, commutative: true},
                {name: "XorInt64x8", argLength: 2, commutative: true},
-               {name: "XorMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "XorMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "XorMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x8", argLength: 3, commutative: true},
                {name: "XorUint8x16", argLength: 2, commutative: true},
                {name: "XorUint8x32", argLength: 2, commutative: true},
                {name: "XorUint8x64", argLength: 2, commutative: true},
@@ -1666,57 +928,27 @@ func simdGenericOps() []opData {
                {name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformInverseUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformInverseUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformInverseUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1736,18 +968,6 @@ func simdGenericOps() []opData {
                {name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1760,18 +980,6 @@ func simdGenericOps() []opData {
                {name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1784,24 +992,12 @@ func simdGenericOps() []opData {
                {name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1821,24 +1017,6 @@ func simdGenericOps() []opData {
                {name: "ShiftAllLeftConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllLeftConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllLeftConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
                {name: "ShiftAllLeftConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllLeftConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllLeftConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1857,24 +1035,6 @@ func simdGenericOps() []opData {
                {name: "ShiftAllRightConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllRightConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllRightConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
                {name: "ShiftAllRightConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllRightConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "ShiftAllRightConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1890,23 +1050,11 @@ func simdGenericOps() []opData {
                {name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
        }
 }
index b45cccd96bbb22238a6f75cf1da6ee9e23eadf47..9f6e10c95cbb99dc3389d86e379e49f7e5bcdeec 100644 (file)
@@ -4648,36 +4648,15 @@ const (
        OpAbsInt64x2
        OpAbsInt64x4
        OpAbsInt64x8
-       OpAbsMaskedInt8x16
-       OpAbsMaskedInt8x32
-       OpAbsMaskedInt8x64
-       OpAbsMaskedInt16x8
-       OpAbsMaskedInt16x16
-       OpAbsMaskedInt16x32
-       OpAbsMaskedInt32x4
-       OpAbsMaskedInt32x8
-       OpAbsMaskedInt32x16
-       OpAbsMaskedInt64x2
-       OpAbsMaskedInt64x4
-       OpAbsMaskedInt64x8
        OpAddDotProdPairsSaturatedInt32x4
        OpAddDotProdPairsSaturatedInt32x8
        OpAddDotProdPairsSaturatedInt32x16
-       OpAddDotProdPairsSaturatedMaskedInt32x4
-       OpAddDotProdPairsSaturatedMaskedInt32x8
-       OpAddDotProdPairsSaturatedMaskedInt32x16
        OpAddDotProdQuadrupleInt32x4
        OpAddDotProdQuadrupleInt32x8
        OpAddDotProdQuadrupleInt32x16
-       OpAddDotProdQuadrupleMaskedInt32x4
-       OpAddDotProdQuadrupleMaskedInt32x8
-       OpAddDotProdQuadrupleMaskedInt32x16
        OpAddDotProdQuadrupleSaturatedInt32x4
        OpAddDotProdQuadrupleSaturatedInt32x8
        OpAddDotProdQuadrupleSaturatedInt32x16
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x4
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x8
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x16
        OpAddFloat32x4
        OpAddFloat32x8
        OpAddFloat32x16
@@ -4696,36 +4675,6 @@ const (
        OpAddInt64x2
        OpAddInt64x4
        OpAddInt64x8
-       OpAddMaskedFloat32x4
-       OpAddMaskedFloat32x8
-       OpAddMaskedFloat32x16
-       OpAddMaskedFloat64x2
-       OpAddMaskedFloat64x4
-       OpAddMaskedFloat64x8
-       OpAddMaskedInt8x16
-       OpAddMaskedInt8x32
-       OpAddMaskedInt8x64
-       OpAddMaskedInt16x8
-       OpAddMaskedInt16x16
-       OpAddMaskedInt16x32
-       OpAddMaskedInt32x4
-       OpAddMaskedInt32x8
-       OpAddMaskedInt32x16
-       OpAddMaskedInt64x2
-       OpAddMaskedInt64x4
-       OpAddMaskedInt64x8
-       OpAddMaskedUint8x16
-       OpAddMaskedUint8x32
-       OpAddMaskedUint8x64
-       OpAddMaskedUint16x8
-       OpAddMaskedUint16x16
-       OpAddMaskedUint16x32
-       OpAddMaskedUint32x4
-       OpAddMaskedUint32x8
-       OpAddMaskedUint32x16
-       OpAddMaskedUint64x2
-       OpAddMaskedUint64x4
-       OpAddMaskedUint64x8
        OpAddPairsFloat32x4
        OpAddPairsFloat32x8
        OpAddPairsFloat64x2
@@ -4746,18 +4695,6 @@ const (
        OpAddSaturatedInt16x8
        OpAddSaturatedInt16x16
        OpAddSaturatedInt16x32
-       OpAddSaturatedMaskedInt8x16
-       OpAddSaturatedMaskedInt8x32
-       OpAddSaturatedMaskedInt8x64
-       OpAddSaturatedMaskedInt16x8
-       OpAddSaturatedMaskedInt16x16
-       OpAddSaturatedMaskedInt16x32
-       OpAddSaturatedMaskedUint8x16
-       OpAddSaturatedMaskedUint8x32
-       OpAddSaturatedMaskedUint8x64
-       OpAddSaturatedMaskedUint16x8
-       OpAddSaturatedMaskedUint16x16
-       OpAddSaturatedMaskedUint16x32
        OpAddSaturatedUint8x16
        OpAddSaturatedUint8x32
        OpAddSaturatedUint8x64
@@ -4792,18 +4729,6 @@ const (
        OpAndInt64x2
        OpAndInt64x4
        OpAndInt64x8
-       OpAndMaskedInt32x4
-       OpAndMaskedInt32x8
-       OpAndMaskedInt32x16
-       OpAndMaskedInt64x2
-       OpAndMaskedInt64x4
-       OpAndMaskedInt64x8
-       OpAndMaskedUint32x4
-       OpAndMaskedUint32x8
-       OpAndMaskedUint32x16
-       OpAndMaskedUint64x2
-       OpAndMaskedUint64x4
-       OpAndMaskedUint64x8
        OpAndNotInt8x16
        OpAndNotInt8x32
        OpAndNotInt8x64
@@ -4816,18 +4741,6 @@ const (
        OpAndNotInt64x2
        OpAndNotInt64x4
        OpAndNotInt64x8
-       OpAndNotMaskedInt32x4
-       OpAndNotMaskedInt32x8
-       OpAndNotMaskedInt32x16
-       OpAndNotMaskedInt64x2
-       OpAndNotMaskedInt64x4
-       OpAndNotMaskedInt64x8
-       OpAndNotMaskedUint32x4
-       OpAndNotMaskedUint32x8
-       OpAndNotMaskedUint32x16
-       OpAndNotMaskedUint64x2
-       OpAndNotMaskedUint64x4
-       OpAndNotMaskedUint64x8
        OpAndNotUint8x16
        OpAndNotUint8x32
        OpAndNotUint8x64
@@ -4852,12 +4765,6 @@ const (
        OpAndUint64x2
        OpAndUint64x4
        OpAndUint64x8
-       OpAverageMaskedUint8x16
-       OpAverageMaskedUint8x32
-       OpAverageMaskedUint8x64
-       OpAverageMaskedUint16x8
-       OpAverageMaskedUint16x16
-       OpAverageMaskedUint16x32
        OpAverageUint8x16
        OpAverageUint8x32
        OpAverageUint8x64
@@ -4870,16 +4777,6 @@ const (
        OpBroadcast128Int16x8
        OpBroadcast128Int32x4
        OpBroadcast128Int64x2
-       OpBroadcast128MaskedFloat32x4
-       OpBroadcast128MaskedFloat64x2
-       OpBroadcast128MaskedInt8x16
-       OpBroadcast128MaskedInt16x8
-       OpBroadcast128MaskedInt32x4
-       OpBroadcast128MaskedInt64x2
-       OpBroadcast128MaskedUint8x16
-       OpBroadcast128MaskedUint16x8
-       OpBroadcast128MaskedUint32x4
-       OpBroadcast128MaskedUint64x2
        OpBroadcast128Uint8x16
        OpBroadcast128Uint16x8
        OpBroadcast128Uint32x4
@@ -4890,16 +4787,6 @@ const (
        OpBroadcast256Int16x8
        OpBroadcast256Int32x4
        OpBroadcast256Int64x2
-       OpBroadcast256MaskedFloat32x4
-       OpBroadcast256MaskedFloat64x2
-       OpBroadcast256MaskedInt8x16
-       OpBroadcast256MaskedInt16x8
-       OpBroadcast256MaskedInt32x4
-       OpBroadcast256MaskedInt64x2
-       OpBroadcast256MaskedUint8x16
-       OpBroadcast256MaskedUint16x8
-       OpBroadcast256MaskedUint32x4
-       OpBroadcast256MaskedUint64x2
        OpBroadcast256Uint8x16
        OpBroadcast256Uint16x8
        OpBroadcast256Uint32x4
@@ -4910,16 +4797,6 @@ const (
        OpBroadcast512Int16x8
        OpBroadcast512Int32x4
        OpBroadcast512Int64x2
-       OpBroadcast512MaskedFloat32x4
-       OpBroadcast512MaskedFloat64x2
-       OpBroadcast512MaskedInt8x16
-       OpBroadcast512MaskedInt16x8
-       OpBroadcast512MaskedInt32x4
-       OpBroadcast512MaskedInt64x2
-       OpBroadcast512MaskedUint8x16
-       OpBroadcast512MaskedUint16x8
-       OpBroadcast512MaskedUint32x4
-       OpBroadcast512MaskedUint64x2
        OpBroadcast512Uint8x16
        OpBroadcast512Uint16x8
        OpBroadcast512Uint32x4
@@ -4961,15 +4838,9 @@ const (
        OpConvertToInt32Float32x4
        OpConvertToInt32Float32x8
        OpConvertToInt32Float32x16
-       OpConvertToInt32MaskedFloat32x4
-       OpConvertToInt32MaskedFloat32x8
-       OpConvertToInt32MaskedFloat32x16
        OpConvertToUint32Float32x4
        OpConvertToUint32Float32x8
        OpConvertToUint32Float32x16
-       OpConvertToUint32MaskedFloat32x4
-       OpConvertToUint32MaskedFloat32x8
-       OpConvertToUint32MaskedFloat32x16
        OpCopySignInt8x16
        OpCopySignInt8x32
        OpCopySignInt16x8
@@ -4982,21 +4853,9 @@ const (
        OpDivFloat64x2
        OpDivFloat64x4
        OpDivFloat64x8
-       OpDivMaskedFloat32x4
-       OpDivMaskedFloat32x8
-       OpDivMaskedFloat32x16
-       OpDivMaskedFloat64x2
-       OpDivMaskedFloat64x4
-       OpDivMaskedFloat64x8
        OpDotProdPairsInt16x8
        OpDotProdPairsInt16x16
        OpDotProdPairsInt16x32
-       OpDotProdPairsMaskedInt16x8
-       OpDotProdPairsMaskedInt16x16
-       OpDotProdPairsMaskedInt16x32
-       OpDotProdPairsSaturatedMaskedUint8x16
-       OpDotProdPairsSaturatedMaskedUint8x32
-       OpDotProdPairsSaturatedMaskedUint8x64
        OpDotProdPairsSaturatedUint8x16
        OpDotProdPairsSaturatedUint8x32
        OpDotProdPairsSaturatedUint8x64
@@ -5018,36 +4877,6 @@ const (
        OpEqualInt64x2
        OpEqualInt64x4
        OpEqualInt64x8
-       OpEqualMaskedFloat32x4
-       OpEqualMaskedFloat32x8
-       OpEqualMaskedFloat32x16
-       OpEqualMaskedFloat64x2
-       OpEqualMaskedFloat64x4
-       OpEqualMaskedFloat64x8
-       OpEqualMaskedInt8x16
-       OpEqualMaskedInt8x32
-       OpEqualMaskedInt8x64
-       OpEqualMaskedInt16x8
-       OpEqualMaskedInt16x16
-       OpEqualMaskedInt16x32
-       OpEqualMaskedInt32x4
-       OpEqualMaskedInt32x8
-       OpEqualMaskedInt32x16
-       OpEqualMaskedInt64x2
-       OpEqualMaskedInt64x4
-       OpEqualMaskedInt64x8
-       OpEqualMaskedUint8x16
-       OpEqualMaskedUint8x32
-       OpEqualMaskedUint8x64
-       OpEqualMaskedUint16x8
-       OpEqualMaskedUint16x16
-       OpEqualMaskedUint16x32
-       OpEqualMaskedUint32x4
-       OpEqualMaskedUint32x8
-       OpEqualMaskedUint32x16
-       OpEqualMaskedUint64x2
-       OpEqualMaskedUint64x4
-       OpEqualMaskedUint64x8
        OpEqualUint8x16
        OpEqualUint8x32
        OpEqualUint8x64
@@ -5094,9 +4923,6 @@ const (
        OpFloorFloat32x8
        OpFloorFloat64x2
        OpFloorFloat64x4
-       OpGaloisFieldMulMaskedUint8x16
-       OpGaloisFieldMulMaskedUint8x32
-       OpGaloisFieldMulMaskedUint8x64
        OpGaloisFieldMulUint8x16
        OpGaloisFieldMulUint8x32
        OpGaloisFieldMulUint8x64
@@ -5150,36 +4976,6 @@ const (
        OpGreaterEqualInt16x32
        OpGreaterEqualInt32x16
        OpGreaterEqualInt64x8
-       OpGreaterEqualMaskedFloat32x4
-       OpGreaterEqualMaskedFloat32x8
-       OpGreaterEqualMaskedFloat32x16
-       OpGreaterEqualMaskedFloat64x2
-       OpGreaterEqualMaskedFloat64x4
-       OpGreaterEqualMaskedFloat64x8
-       OpGreaterEqualMaskedInt8x16
-       OpGreaterEqualMaskedInt8x32
-       OpGreaterEqualMaskedInt8x64
-       OpGreaterEqualMaskedInt16x8
-       OpGreaterEqualMaskedInt16x16
-       OpGreaterEqualMaskedInt16x32
-       OpGreaterEqualMaskedInt32x4
-       OpGreaterEqualMaskedInt32x8
-       OpGreaterEqualMaskedInt32x16
-       OpGreaterEqualMaskedInt64x2
-       OpGreaterEqualMaskedInt64x4
-       OpGreaterEqualMaskedInt64x8
-       OpGreaterEqualMaskedUint8x16
-       OpGreaterEqualMaskedUint8x32
-       OpGreaterEqualMaskedUint8x64
-       OpGreaterEqualMaskedUint16x8
-       OpGreaterEqualMaskedUint16x16
-       OpGreaterEqualMaskedUint16x32
-       OpGreaterEqualMaskedUint32x4
-       OpGreaterEqualMaskedUint32x8
-       OpGreaterEqualMaskedUint32x16
-       OpGreaterEqualMaskedUint64x2
-       OpGreaterEqualMaskedUint64x4
-       OpGreaterEqualMaskedUint64x8
        OpGreaterEqualUint8x64
        OpGreaterEqualUint16x32
        OpGreaterEqualUint32x16
@@ -5202,36 +4998,6 @@ const (
        OpGreaterInt64x2
        OpGreaterInt64x4
        OpGreaterInt64x8
-       OpGreaterMaskedFloat32x4
-       OpGreaterMaskedFloat32x8
-       OpGreaterMaskedFloat32x16
-       OpGreaterMaskedFloat64x2
-       OpGreaterMaskedFloat64x4
-       OpGreaterMaskedFloat64x8
-       OpGreaterMaskedInt8x16
-       OpGreaterMaskedInt8x32
-       OpGreaterMaskedInt8x64
-       OpGreaterMaskedInt16x8
-       OpGreaterMaskedInt16x16
-       OpGreaterMaskedInt16x32
-       OpGreaterMaskedInt32x4
-       OpGreaterMaskedInt32x8
-       OpGreaterMaskedInt32x16
-       OpGreaterMaskedInt64x2
-       OpGreaterMaskedInt64x4
-       OpGreaterMaskedInt64x8
-       OpGreaterMaskedUint8x16
-       OpGreaterMaskedUint8x32
-       OpGreaterMaskedUint8x64
-       OpGreaterMaskedUint16x8
-       OpGreaterMaskedUint16x16
-       OpGreaterMaskedUint16x32
-       OpGreaterMaskedUint32x4
-       OpGreaterMaskedUint32x8
-       OpGreaterMaskedUint32x16
-       OpGreaterMaskedUint64x2
-       OpGreaterMaskedUint64x4
-       OpGreaterMaskedUint64x8
        OpGreaterUint8x64
        OpGreaterUint16x32
        OpGreaterUint32x16
@@ -5242,12 +5008,6 @@ const (
        OpIsNanFloat64x2
        OpIsNanFloat64x4
        OpIsNanFloat64x8
-       OpIsNanMaskedFloat32x4
-       OpIsNanMaskedFloat32x8
-       OpIsNanMaskedFloat32x16
-       OpIsNanMaskedFloat64x2
-       OpIsNanMaskedFloat64x4
-       OpIsNanMaskedFloat64x8
        OpLessEqualFloat32x4
        OpLessEqualFloat32x8
        OpLessEqualFloat32x16
@@ -5258,36 +5018,6 @@ const (
        OpLessEqualInt16x32
        OpLessEqualInt32x16
        OpLessEqualInt64x8
-       OpLessEqualMaskedFloat32x4
-       OpLessEqualMaskedFloat32x8
-       OpLessEqualMaskedFloat32x16
-       OpLessEqualMaskedFloat64x2
-       OpLessEqualMaskedFloat64x4
-       OpLessEqualMaskedFloat64x8
-       OpLessEqualMaskedInt8x16
-       OpLessEqualMaskedInt8x32
-       OpLessEqualMaskedInt8x64
-       OpLessEqualMaskedInt16x8
-       OpLessEqualMaskedInt16x16
-       OpLessEqualMaskedInt16x32
-       OpLessEqualMaskedInt32x4
-       OpLessEqualMaskedInt32x8
-       OpLessEqualMaskedInt32x16
-       OpLessEqualMaskedInt64x2
-       OpLessEqualMaskedInt64x4
-       OpLessEqualMaskedInt64x8
-       OpLessEqualMaskedUint8x16
-       OpLessEqualMaskedUint8x32
-       OpLessEqualMaskedUint8x64
-       OpLessEqualMaskedUint16x8
-       OpLessEqualMaskedUint16x16
-       OpLessEqualMaskedUint16x32
-       OpLessEqualMaskedUint32x4
-       OpLessEqualMaskedUint32x8
-       OpLessEqualMaskedUint32x16
-       OpLessEqualMaskedUint64x2
-       OpLessEqualMaskedUint64x4
-       OpLessEqualMaskedUint64x8
        OpLessEqualUint8x64
        OpLessEqualUint16x32
        OpLessEqualUint32x16
@@ -5302,36 +5032,6 @@ const (
        OpLessInt16x32
        OpLessInt32x16
        OpLessInt64x8
-       OpLessMaskedFloat32x4
-       OpLessMaskedFloat32x8
-       OpLessMaskedFloat32x16
-       OpLessMaskedFloat64x2
-       OpLessMaskedFloat64x4
-       OpLessMaskedFloat64x8
-       OpLessMaskedInt8x16
-       OpLessMaskedInt8x32
-       OpLessMaskedInt8x64
-       OpLessMaskedInt16x8
-       OpLessMaskedInt16x16
-       OpLessMaskedInt16x32
-       OpLessMaskedInt32x4
-       OpLessMaskedInt32x8
-       OpLessMaskedInt32x16
-       OpLessMaskedInt64x2
-       OpLessMaskedInt64x4
-       OpLessMaskedInt64x8
-       OpLessMaskedUint8x16
-       OpLessMaskedUint8x32
-       OpLessMaskedUint8x64
-       OpLessMaskedUint16x8
-       OpLessMaskedUint16x16
-       OpLessMaskedUint16x32
-       OpLessMaskedUint32x4
-       OpLessMaskedUint32x8
-       OpLessMaskedUint32x16
-       OpLessMaskedUint64x2
-       OpLessMaskedUint64x4
-       OpLessMaskedUint64x8
        OpLessUint8x64
        OpLessUint16x32
        OpLessUint32x16
@@ -5354,36 +5054,6 @@ const (
        OpMaxInt64x2
        OpMaxInt64x4
        OpMaxInt64x8
-       OpMaxMaskedFloat32x4
-       OpMaxMaskedFloat32x8
-       OpMaxMaskedFloat32x16
-       OpMaxMaskedFloat64x2
-       OpMaxMaskedFloat64x4
-       OpMaxMaskedFloat64x8
-       OpMaxMaskedInt8x16
-       OpMaxMaskedInt8x32
-       OpMaxMaskedInt8x64
-       OpMaxMaskedInt16x8
-       OpMaxMaskedInt16x16
-       OpMaxMaskedInt16x32
-       OpMaxMaskedInt32x4
-       OpMaxMaskedInt32x8
-       OpMaxMaskedInt32x16
-       OpMaxMaskedInt64x2
-       OpMaxMaskedInt64x4
-       OpMaxMaskedInt64x8
-       OpMaxMaskedUint8x16
-       OpMaxMaskedUint8x32
-       OpMaxMaskedUint8x64
-       OpMaxMaskedUint16x8
-       OpMaxMaskedUint16x16
-       OpMaxMaskedUint16x32
-       OpMaxMaskedUint32x4
-       OpMaxMaskedUint32x8
-       OpMaxMaskedUint32x16
-       OpMaxMaskedUint64x2
-       OpMaxMaskedUint64x4
-       OpMaxMaskedUint64x8
        OpMaxUint8x16
        OpMaxUint8x32
        OpMaxUint8x64
@@ -5414,36 +5084,6 @@ const (
        OpMinInt64x2
        OpMinInt64x4
        OpMinInt64x8
-       OpMinMaskedFloat32x4
-       OpMinMaskedFloat32x8
-       OpMinMaskedFloat32x16
-       OpMinMaskedFloat64x2
-       OpMinMaskedFloat64x4
-       OpMinMaskedFloat64x8
-       OpMinMaskedInt8x16
-       OpMinMaskedInt8x32
-       OpMinMaskedInt8x64
-       OpMinMaskedInt16x8
-       OpMinMaskedInt16x16
-       OpMinMaskedInt16x32
-       OpMinMaskedInt32x4
-       OpMinMaskedInt32x8
-       OpMinMaskedInt32x16
-       OpMinMaskedInt64x2
-       OpMinMaskedInt64x4
-       OpMinMaskedInt64x8
-       OpMinMaskedUint8x16
-       OpMinMaskedUint8x32
-       OpMinMaskedUint8x64
-       OpMinMaskedUint16x8
-       OpMinMaskedUint16x16
-       OpMinMaskedUint16x32
-       OpMinMaskedUint32x4
-       OpMinMaskedUint32x8
-       OpMinMaskedUint32x16
-       OpMinMaskedUint64x2
-       OpMinMaskedUint64x4
-       OpMinMaskedUint64x8
        OpMinUint8x16
        OpMinUint8x32
        OpMinUint8x64
@@ -5462,24 +5102,12 @@ const (
        OpMulAddFloat64x2
        OpMulAddFloat64x4
        OpMulAddFloat64x8
-       OpMulAddMaskedFloat32x4
-       OpMulAddMaskedFloat32x8
-       OpMulAddMaskedFloat32x16
-       OpMulAddMaskedFloat64x2
-       OpMulAddMaskedFloat64x4
-       OpMulAddMaskedFloat64x8
        OpMulAddSubFloat32x4
        OpMulAddSubFloat32x8
        OpMulAddSubFloat32x16
        OpMulAddSubFloat64x2
        OpMulAddSubFloat64x4
        OpMulAddSubFloat64x8
-       OpMulAddSubMaskedFloat32x4
-       OpMulAddSubMaskedFloat32x8
-       OpMulAddSubMaskedFloat32x16
-       OpMulAddSubMaskedFloat64x2
-       OpMulAddSubMaskedFloat64x4
-       OpMulAddSubMaskedFloat64x8
        OpMulEvenWidenInt32x4
        OpMulEvenWidenInt32x8
        OpMulEvenWidenUint32x4
@@ -5493,12 +5121,6 @@ const (
        OpMulHighInt16x8
        OpMulHighInt16x16
        OpMulHighInt16x32
-       OpMulHighMaskedInt16x8
-       OpMulHighMaskedInt16x16
-       OpMulHighMaskedInt16x32
-       OpMulHighMaskedUint16x8
-       OpMulHighMaskedUint16x16
-       OpMulHighMaskedUint16x32
        OpMulHighUint16x8
        OpMulHighUint16x16
        OpMulHighUint16x32
@@ -5511,42 +5133,12 @@ const (
        OpMulInt64x2
        OpMulInt64x4
        OpMulInt64x8
-       OpMulMaskedFloat32x4
-       OpMulMaskedFloat32x8
-       OpMulMaskedFloat32x16
-       OpMulMaskedFloat64x2
-       OpMulMaskedFloat64x4
-       OpMulMaskedFloat64x8
-       OpMulMaskedInt16x8
-       OpMulMaskedInt16x16
-       OpMulMaskedInt16x32
-       OpMulMaskedInt32x4
-       OpMulMaskedInt32x8
-       OpMulMaskedInt32x16
-       OpMulMaskedInt64x2
-       OpMulMaskedInt64x4
-       OpMulMaskedInt64x8
-       OpMulMaskedUint16x8
-       OpMulMaskedUint16x16
-       OpMulMaskedUint16x32
-       OpMulMaskedUint32x4
-       OpMulMaskedUint32x8
-       OpMulMaskedUint32x16
-       OpMulMaskedUint64x2
-       OpMulMaskedUint64x4
-       OpMulMaskedUint64x8
        OpMulSubAddFloat32x4
        OpMulSubAddFloat32x8
        OpMulSubAddFloat32x16
        OpMulSubAddFloat64x2
        OpMulSubAddFloat64x4
        OpMulSubAddFloat64x8
-       OpMulSubAddMaskedFloat32x4
-       OpMulSubAddMaskedFloat32x8
-       OpMulSubAddMaskedFloat32x16
-       OpMulSubAddMaskedFloat64x2
-       OpMulSubAddMaskedFloat64x4
-       OpMulSubAddMaskedFloat64x8
        OpMulUint16x8
        OpMulUint16x16
        OpMulUint16x32
@@ -5566,36 +5158,6 @@ const (
        OpNotEqualInt16x32
        OpNotEqualInt32x16
        OpNotEqualInt64x8
-       OpNotEqualMaskedFloat32x4
-       OpNotEqualMaskedFloat32x8
-       OpNotEqualMaskedFloat32x16
-       OpNotEqualMaskedFloat64x2
-       OpNotEqualMaskedFloat64x4
-       OpNotEqualMaskedFloat64x8
-       OpNotEqualMaskedInt8x16
-       OpNotEqualMaskedInt8x32
-       OpNotEqualMaskedInt8x64
-       OpNotEqualMaskedInt16x8
-       OpNotEqualMaskedInt16x16
-       OpNotEqualMaskedInt16x32
-       OpNotEqualMaskedInt32x4
-       OpNotEqualMaskedInt32x8
-       OpNotEqualMaskedInt32x16
-       OpNotEqualMaskedInt64x2
-       OpNotEqualMaskedInt64x4
-       OpNotEqualMaskedInt64x8
-       OpNotEqualMaskedUint8x16
-       OpNotEqualMaskedUint8x32
-       OpNotEqualMaskedUint8x64
-       OpNotEqualMaskedUint16x8
-       OpNotEqualMaskedUint16x16
-       OpNotEqualMaskedUint16x32
-       OpNotEqualMaskedUint32x4
-       OpNotEqualMaskedUint32x8
-       OpNotEqualMaskedUint32x16
-       OpNotEqualMaskedUint64x2
-       OpNotEqualMaskedUint64x4
-       OpNotEqualMaskedUint64x8
        OpNotEqualUint8x64
        OpNotEqualUint16x32
        OpNotEqualUint32x16
@@ -5612,30 +5174,6 @@ const (
        OpOnesCountInt64x2
        OpOnesCountInt64x4
        OpOnesCountInt64x8
-       OpOnesCountMaskedInt8x16
-       OpOnesCountMaskedInt8x32
-       OpOnesCountMaskedInt8x64
-       OpOnesCountMaskedInt16x8
-       OpOnesCountMaskedInt16x16
-       OpOnesCountMaskedInt16x32
-       OpOnesCountMaskedInt32x4
-       OpOnesCountMaskedInt32x8
-       OpOnesCountMaskedInt32x16
-       OpOnesCountMaskedInt64x2
-       OpOnesCountMaskedInt64x4
-       OpOnesCountMaskedInt64x8
-       OpOnesCountMaskedUint8x16
-       OpOnesCountMaskedUint8x32
-       OpOnesCountMaskedUint8x64
-       OpOnesCountMaskedUint16x8
-       OpOnesCountMaskedUint16x16
-       OpOnesCountMaskedUint16x32
-       OpOnesCountMaskedUint32x4
-       OpOnesCountMaskedUint32x8
-       OpOnesCountMaskedUint32x16
-       OpOnesCountMaskedUint64x2
-       OpOnesCountMaskedUint64x4
-       OpOnesCountMaskedUint64x8
        OpOnesCountUint8x16
        OpOnesCountUint8x32
        OpOnesCountUint8x64
@@ -5660,18 +5198,6 @@ const (
        OpOrInt64x2
        OpOrInt64x4
        OpOrInt64x8
-       OpOrMaskedInt32x4
-       OpOrMaskedInt32x8
-       OpOrMaskedInt32x16
-       OpOrMaskedInt64x2
-       OpOrMaskedInt64x4
-       OpOrMaskedInt64x8
-       OpOrMaskedUint32x4
-       OpOrMaskedUint32x8
-       OpOrMaskedUint32x16
-       OpOrMaskedUint64x2
-       OpOrMaskedUint64x4
-       OpOrMaskedUint64x8
        OpOrUint8x16
        OpOrUint8x32
        OpOrUint8x64
@@ -5702,36 +5228,6 @@ const (
        OpPermute2Int64x2
        OpPermute2Int64x4
        OpPermute2Int64x8
-       OpPermute2MaskedFloat32x4
-       OpPermute2MaskedFloat32x8
-       OpPermute2MaskedFloat32x16
-       OpPermute2MaskedFloat64x2
-       OpPermute2MaskedFloat64x4
-       OpPermute2MaskedFloat64x8
-       OpPermute2MaskedInt8x16
-       OpPermute2MaskedInt8x32
-       OpPermute2MaskedInt8x64
-       OpPermute2MaskedInt16x8
-       OpPermute2MaskedInt16x16
-       OpPermute2MaskedInt16x32
-       OpPermute2MaskedInt32x4
-       OpPermute2MaskedInt32x8
-       OpPermute2MaskedInt32x16
-       OpPermute2MaskedInt64x2
-       OpPermute2MaskedInt64x4
-       OpPermute2MaskedInt64x8
-       OpPermute2MaskedUint8x16
-       OpPermute2MaskedUint8x32
-       OpPermute2MaskedUint8x64
-       OpPermute2MaskedUint16x8
-       OpPermute2MaskedUint16x16
-       OpPermute2MaskedUint16x32
-       OpPermute2MaskedUint32x4
-       OpPermute2MaskedUint32x8
-       OpPermute2MaskedUint32x16
-       OpPermute2MaskedUint64x2
-       OpPermute2MaskedUint64x4
-       OpPermute2MaskedUint64x8
        OpPermute2Uint8x16
        OpPermute2Uint8x32
        OpPermute2Uint8x64
@@ -5758,30 +5254,6 @@ const (
        OpPermuteInt32x16
        OpPermuteInt64x4
        OpPermuteInt64x8
-       OpPermuteMaskedFloat32x8
-       OpPermuteMaskedFloat32x16
-       OpPermuteMaskedFloat64x4
-       OpPermuteMaskedFloat64x8
-       OpPermuteMaskedInt8x16
-       OpPermuteMaskedInt8x32
-       OpPermuteMaskedInt8x64
-       OpPermuteMaskedInt16x8
-       OpPermuteMaskedInt16x16
-       OpPermuteMaskedInt16x32
-       OpPermuteMaskedInt32x8
-       OpPermuteMaskedInt32x16
-       OpPermuteMaskedInt64x4
-       OpPermuteMaskedInt64x8
-       OpPermuteMaskedUint8x16
-       OpPermuteMaskedUint8x32
-       OpPermuteMaskedUint8x64
-       OpPermuteMaskedUint16x8
-       OpPermuteMaskedUint16x16
-       OpPermuteMaskedUint16x32
-       OpPermuteMaskedUint32x8
-       OpPermuteMaskedUint32x16
-       OpPermuteMaskedUint64x4
-       OpPermuteMaskedUint64x8
        OpPermuteUint8x16
        OpPermuteUint8x32
        OpPermuteUint8x64
@@ -5798,42 +5270,18 @@ const (
        OpReciprocalFloat64x2
        OpReciprocalFloat64x4
        OpReciprocalFloat64x8
-       OpReciprocalMaskedFloat32x4
-       OpReciprocalMaskedFloat32x8
-       OpReciprocalMaskedFloat32x16
-       OpReciprocalMaskedFloat64x2
-       OpReciprocalMaskedFloat64x4
-       OpReciprocalMaskedFloat64x8
        OpReciprocalSqrtFloat32x4
        OpReciprocalSqrtFloat32x8
        OpReciprocalSqrtFloat32x16
        OpReciprocalSqrtFloat64x2
        OpReciprocalSqrtFloat64x4
        OpReciprocalSqrtFloat64x8
-       OpReciprocalSqrtMaskedFloat32x4
-       OpReciprocalSqrtMaskedFloat32x8
-       OpReciprocalSqrtMaskedFloat32x16
-       OpReciprocalSqrtMaskedFloat64x2
-       OpReciprocalSqrtMaskedFloat64x4
-       OpReciprocalSqrtMaskedFloat64x8
        OpRotateLeftInt32x4
        OpRotateLeftInt32x8
        OpRotateLeftInt32x16
        OpRotateLeftInt64x2
        OpRotateLeftInt64x4
        OpRotateLeftInt64x8
-       OpRotateLeftMaskedInt32x4
-       OpRotateLeftMaskedInt32x8
-       OpRotateLeftMaskedInt32x16
-       OpRotateLeftMaskedInt64x2
-       OpRotateLeftMaskedInt64x4
-       OpRotateLeftMaskedInt64x8
-       OpRotateLeftMaskedUint32x4
-       OpRotateLeftMaskedUint32x8
-       OpRotateLeftMaskedUint32x16
-       OpRotateLeftMaskedUint64x2
-       OpRotateLeftMaskedUint64x4
-       OpRotateLeftMaskedUint64x8
        OpRotateLeftUint32x4
        OpRotateLeftUint32x8
        OpRotateLeftUint32x16
@@ -5846,18 +5294,6 @@ const (
        OpRotateRightInt64x2
        OpRotateRightInt64x4
        OpRotateRightInt64x8
-       OpRotateRightMaskedInt32x4
-       OpRotateRightMaskedInt32x8
-       OpRotateRightMaskedInt32x16
-       OpRotateRightMaskedInt64x2
-       OpRotateRightMaskedInt64x4
-       OpRotateRightMaskedInt64x8
-       OpRotateRightMaskedUint32x4
-       OpRotateRightMaskedUint32x8
-       OpRotateRightMaskedUint32x16
-       OpRotateRightMaskedUint64x2
-       OpRotateRightMaskedUint64x4
-       OpRotateRightMaskedUint64x8
        OpRotateRightUint32x4
        OpRotateRightUint32x8
        OpRotateRightUint32x16
@@ -5874,12 +5310,6 @@ const (
        OpScaleFloat64x2
        OpScaleFloat64x4
        OpScaleFloat64x8
-       OpScaleMaskedFloat32x4
-       OpScaleMaskedFloat32x8
-       OpScaleMaskedFloat32x16
-       OpScaleMaskedFloat64x2
-       OpScaleMaskedFloat64x4
-       OpScaleMaskedFloat64x8
        OpSetHiFloat32x8
        OpSetHiFloat32x16
        OpSetHiFloat64x4
@@ -5929,24 +5359,6 @@ const (
        OpShiftAllLeftInt64x2
        OpShiftAllLeftInt64x4
        OpShiftAllLeftInt64x8
-       OpShiftAllLeftMaskedInt16x8
-       OpShiftAllLeftMaskedInt16x16
-       OpShiftAllLeftMaskedInt16x32
-       OpShiftAllLeftMaskedInt32x4
-       OpShiftAllLeftMaskedInt32x8
-       OpShiftAllLeftMaskedInt32x16
-       OpShiftAllLeftMaskedInt64x2
-       OpShiftAllLeftMaskedInt64x4
-       OpShiftAllLeftMaskedInt64x8
-       OpShiftAllLeftMaskedUint16x8
-       OpShiftAllLeftMaskedUint16x16
-       OpShiftAllLeftMaskedUint16x32
-       OpShiftAllLeftMaskedUint32x4
-       OpShiftAllLeftMaskedUint32x8
-       OpShiftAllLeftMaskedUint32x16
-       OpShiftAllLeftMaskedUint64x2
-       OpShiftAllLeftMaskedUint64x4
-       OpShiftAllLeftMaskedUint64x8
        OpShiftAllLeftUint16x8
        OpShiftAllLeftUint16x16
        OpShiftAllLeftUint16x32
@@ -5965,24 +5377,6 @@ const (
        OpShiftAllRightInt64x2
        OpShiftAllRightInt64x4
        OpShiftAllRightInt64x8
-       OpShiftAllRightMaskedInt16x8
-       OpShiftAllRightMaskedInt16x16
-       OpShiftAllRightMaskedInt16x32
-       OpShiftAllRightMaskedInt32x4
-       OpShiftAllRightMaskedInt32x8
-       OpShiftAllRightMaskedInt32x16
-       OpShiftAllRightMaskedInt64x2
-       OpShiftAllRightMaskedInt64x4
-       OpShiftAllRightMaskedInt64x8
-       OpShiftAllRightMaskedUint16x8
-       OpShiftAllRightMaskedUint16x16
-       OpShiftAllRightMaskedUint16x32
-       OpShiftAllRightMaskedUint32x4
-       OpShiftAllRightMaskedUint32x8
-       OpShiftAllRightMaskedUint32x16
-       OpShiftAllRightMaskedUint64x2
-       OpShiftAllRightMaskedUint64x4
-       OpShiftAllRightMaskedUint64x8
        OpShiftAllRightUint16x8
        OpShiftAllRightUint16x16
        OpShiftAllRightUint16x32
@@ -6001,24 +5395,6 @@ const (
        OpShiftLeftConcatInt64x2
        OpShiftLeftConcatInt64x4
        OpShiftLeftConcatInt64x8
-       OpShiftLeftConcatMaskedInt16x8
-       OpShiftLeftConcatMaskedInt16x16
-       OpShiftLeftConcatMaskedInt16x32
-       OpShiftLeftConcatMaskedInt32x4
-       OpShiftLeftConcatMaskedInt32x8
-       OpShiftLeftConcatMaskedInt32x16
-       OpShiftLeftConcatMaskedInt64x2
-       OpShiftLeftConcatMaskedInt64x4
-       OpShiftLeftConcatMaskedInt64x8
-       OpShiftLeftConcatMaskedUint16x8
-       OpShiftLeftConcatMaskedUint16x16
-       OpShiftLeftConcatMaskedUint16x32
-       OpShiftLeftConcatMaskedUint32x4
-       OpShiftLeftConcatMaskedUint32x8
-       OpShiftLeftConcatMaskedUint32x16
-       OpShiftLeftConcatMaskedUint64x2
-       OpShiftLeftConcatMaskedUint64x4
-       OpShiftLeftConcatMaskedUint64x8
        OpShiftLeftConcatUint16x8
        OpShiftLeftConcatUint16x16
        OpShiftLeftConcatUint16x32
@@ -6037,24 +5413,6 @@ const (
        OpShiftLeftInt64x2
        OpShiftLeftInt64x4
        OpShiftLeftInt64x8
-       OpShiftLeftMaskedInt16x8
-       OpShiftLeftMaskedInt16x16
-       OpShiftLeftMaskedInt16x32
-       OpShiftLeftMaskedInt32x4
-       OpShiftLeftMaskedInt32x8
-       OpShiftLeftMaskedInt32x16
-       OpShiftLeftMaskedInt64x2
-       OpShiftLeftMaskedInt64x4
-       OpShiftLeftMaskedInt64x8
-       OpShiftLeftMaskedUint16x8
-       OpShiftLeftMaskedUint16x16
-       OpShiftLeftMaskedUint16x32
-       OpShiftLeftMaskedUint32x4
-       OpShiftLeftMaskedUint32x8
-       OpShiftLeftMaskedUint32x16
-       OpShiftLeftMaskedUint64x2
-       OpShiftLeftMaskedUint64x4
-       OpShiftLeftMaskedUint64x8
        OpShiftLeftUint16x8
        OpShiftLeftUint16x16
        OpShiftLeftUint16x32
@@ -6073,24 +5431,6 @@ const (
        OpShiftRightConcatInt64x2
        OpShiftRightConcatInt64x4
        OpShiftRightConcatInt64x8
-       OpShiftRightConcatMaskedInt16x8
-       OpShiftRightConcatMaskedInt16x16
-       OpShiftRightConcatMaskedInt16x32
-       OpShiftRightConcatMaskedInt32x4
-       OpShiftRightConcatMaskedInt32x8
-       OpShiftRightConcatMaskedInt32x16
-       OpShiftRightConcatMaskedInt64x2
-       OpShiftRightConcatMaskedInt64x4
-       OpShiftRightConcatMaskedInt64x8
-       OpShiftRightConcatMaskedUint16x8
-       OpShiftRightConcatMaskedUint16x16
-       OpShiftRightConcatMaskedUint16x32
-       OpShiftRightConcatMaskedUint32x4
-       OpShiftRightConcatMaskedUint32x8
-       OpShiftRightConcatMaskedUint32x16
-       OpShiftRightConcatMaskedUint64x2
-       OpShiftRightConcatMaskedUint64x4
-       OpShiftRightConcatMaskedUint64x8
        OpShiftRightConcatUint16x8
        OpShiftRightConcatUint16x16
        OpShiftRightConcatUint16x32
@@ -6109,24 +5449,6 @@ const (
        OpShiftRightInt64x2
        OpShiftRightInt64x4
        OpShiftRightInt64x8
-       OpShiftRightMaskedInt16x8
-       OpShiftRightMaskedInt16x16
-       OpShiftRightMaskedInt16x32
-       OpShiftRightMaskedInt32x4
-       OpShiftRightMaskedInt32x8
-       OpShiftRightMaskedInt32x16
-       OpShiftRightMaskedInt64x2
-       OpShiftRightMaskedInt64x4
-       OpShiftRightMaskedInt64x8
-       OpShiftRightMaskedUint16x8
-       OpShiftRightMaskedUint16x16
-       OpShiftRightMaskedUint16x32
-       OpShiftRightMaskedUint32x4
-       OpShiftRightMaskedUint32x8
-       OpShiftRightMaskedUint32x16
-       OpShiftRightMaskedUint64x2
-       OpShiftRightMaskedUint64x4
-       OpShiftRightMaskedUint64x8
        OpShiftRightUint16x8
        OpShiftRightUint16x16
        OpShiftRightUint16x32
@@ -6142,12 +5464,6 @@ const (
        OpSqrtFloat64x2
        OpSqrtFloat64x4
        OpSqrtFloat64x8
-       OpSqrtMaskedFloat32x4
-       OpSqrtMaskedFloat32x8
-       OpSqrtMaskedFloat32x16
-       OpSqrtMaskedFloat64x2
-       OpSqrtMaskedFloat64x4
-       OpSqrtMaskedFloat64x8
        OpSubFloat32x4
        OpSubFloat32x8
        OpSubFloat32x16
@@ -6166,36 +5482,6 @@ const (
        OpSubInt64x2
        OpSubInt64x4
        OpSubInt64x8
-       OpSubMaskedFloat32x4
-       OpSubMaskedFloat32x8
-       OpSubMaskedFloat32x16
-       OpSubMaskedFloat64x2
-       OpSubMaskedFloat64x4
-       OpSubMaskedFloat64x8
-       OpSubMaskedInt8x16
-       OpSubMaskedInt8x32
-       OpSubMaskedInt8x64
-       OpSubMaskedInt16x8
-       OpSubMaskedInt16x16
-       OpSubMaskedInt16x32
-       OpSubMaskedInt32x4
-       OpSubMaskedInt32x8
-       OpSubMaskedInt32x16
-       OpSubMaskedInt64x2
-       OpSubMaskedInt64x4
-       OpSubMaskedInt64x8
-       OpSubMaskedUint8x16
-       OpSubMaskedUint8x32
-       OpSubMaskedUint8x64
-       OpSubMaskedUint16x8
-       OpSubMaskedUint16x16
-       OpSubMaskedUint16x32
-       OpSubMaskedUint32x4
-       OpSubMaskedUint32x8
-       OpSubMaskedUint32x16
-       OpSubMaskedUint64x2
-       OpSubMaskedUint64x4
-       OpSubMaskedUint64x8
        OpSubPairsFloat32x4
        OpSubPairsFloat32x8
        OpSubPairsFloat64x2
@@ -6216,18 +5502,6 @@ const (
        OpSubSaturatedInt16x8
        OpSubSaturatedInt16x16
        OpSubSaturatedInt16x32
-       OpSubSaturatedMaskedInt8x16
-       OpSubSaturatedMaskedInt8x32
-       OpSubSaturatedMaskedInt8x64
-       OpSubSaturatedMaskedInt16x8
-       OpSubSaturatedMaskedInt16x16
-       OpSubSaturatedMaskedInt16x32
-       OpSubSaturatedMaskedUint8x16
-       OpSubSaturatedMaskedUint8x32
-       OpSubSaturatedMaskedUint8x64
-       OpSubSaturatedMaskedUint16x8
-       OpSubSaturatedMaskedUint16x16
-       OpSubSaturatedMaskedUint16x32
        OpSubSaturatedUint8x16
        OpSubSaturatedUint8x32
        OpSubSaturatedUint8x64
@@ -6262,18 +5536,6 @@ const (
        OpXorInt64x2
        OpXorInt64x4
        OpXorInt64x8
-       OpXorMaskedInt32x4
-       OpXorMaskedInt32x8
-       OpXorMaskedInt32x16
-       OpXorMaskedInt64x2
-       OpXorMaskedInt64x4
-       OpXorMaskedInt64x8
-       OpXorMaskedUint32x4
-       OpXorMaskedUint32x8
-       OpXorMaskedUint32x16
-       OpXorMaskedUint64x2
-       OpXorMaskedUint64x4
-       OpXorMaskedUint64x8
        OpXorUint8x16
        OpXorUint8x32
        OpXorUint8x64
@@ -6298,57 +5560,27 @@ const (
        OpCeilScaledFloat64x2
        OpCeilScaledFloat64x4
        OpCeilScaledFloat64x8
-       OpCeilScaledMaskedFloat32x4
-       OpCeilScaledMaskedFloat32x8
-       OpCeilScaledMaskedFloat32x16
-       OpCeilScaledMaskedFloat64x2
-       OpCeilScaledMaskedFloat64x4
-       OpCeilScaledMaskedFloat64x8
        OpCeilScaledResidueFloat32x4
        OpCeilScaledResidueFloat32x8
        OpCeilScaledResidueFloat32x16
        OpCeilScaledResidueFloat64x2
        OpCeilScaledResidueFloat64x4
        OpCeilScaledResidueFloat64x8
-       OpCeilScaledResidueMaskedFloat32x4
-       OpCeilScaledResidueMaskedFloat32x8
-       OpCeilScaledResidueMaskedFloat32x16
-       OpCeilScaledResidueMaskedFloat64x2
-       OpCeilScaledResidueMaskedFloat64x4
-       OpCeilScaledResidueMaskedFloat64x8
        OpFloorScaledFloat32x4
        OpFloorScaledFloat32x8
        OpFloorScaledFloat32x16
        OpFloorScaledFloat64x2
        OpFloorScaledFloat64x4
        OpFloorScaledFloat64x8
-       OpFloorScaledMaskedFloat32x4
-       OpFloorScaledMaskedFloat32x8
-       OpFloorScaledMaskedFloat32x16
-       OpFloorScaledMaskedFloat64x2
-       OpFloorScaledMaskedFloat64x4
-       OpFloorScaledMaskedFloat64x8
        OpFloorScaledResidueFloat32x4
        OpFloorScaledResidueFloat32x8
        OpFloorScaledResidueFloat32x16
        OpFloorScaledResidueFloat64x2
        OpFloorScaledResidueFloat64x4
        OpFloorScaledResidueFloat64x8
-       OpFloorScaledResidueMaskedFloat32x4
-       OpFloorScaledResidueMaskedFloat32x8
-       OpFloorScaledResidueMaskedFloat32x16
-       OpFloorScaledResidueMaskedFloat64x2
-       OpFloorScaledResidueMaskedFloat64x4
-       OpFloorScaledResidueMaskedFloat64x8
-       OpGaloisFieldAffineTransformInverseMaskedUint8x16
-       OpGaloisFieldAffineTransformInverseMaskedUint8x32
-       OpGaloisFieldAffineTransformInverseMaskedUint8x64
        OpGaloisFieldAffineTransformInverseUint8x16
        OpGaloisFieldAffineTransformInverseUint8x32
        OpGaloisFieldAffineTransformInverseUint8x64
-       OpGaloisFieldAffineTransformMaskedUint8x16
-       OpGaloisFieldAffineTransformMaskedUint8x32
-       OpGaloisFieldAffineTransformMaskedUint8x64
        OpGaloisFieldAffineTransformUint8x16
        OpGaloisFieldAffineTransformUint8x32
        OpGaloisFieldAffineTransformUint8x64
@@ -6368,18 +5600,6 @@ const (
        OpRotateAllLeftInt64x2
        OpRotateAllLeftInt64x4
        OpRotateAllLeftInt64x8
-       OpRotateAllLeftMaskedInt32x4
-       OpRotateAllLeftMaskedInt32x8
-       OpRotateAllLeftMaskedInt32x16
-       OpRotateAllLeftMaskedInt64x2
-       OpRotateAllLeftMaskedInt64x4
-       OpRotateAllLeftMaskedInt64x8
-       OpRotateAllLeftMaskedUint32x4
-       OpRotateAllLeftMaskedUint32x8
-       OpRotateAllLeftMaskedUint32x16
-       OpRotateAllLeftMaskedUint64x2
-       OpRotateAllLeftMaskedUint64x4
-       OpRotateAllLeftMaskedUint64x8
        OpRotateAllLeftUint32x4
        OpRotateAllLeftUint32x8
        OpRotateAllLeftUint32x16
@@ -6392,18 +5612,6 @@ const (
        OpRotateAllRightInt64x2
        OpRotateAllRightInt64x4
        OpRotateAllRightInt64x8
-       OpRotateAllRightMaskedInt32x4
-       OpRotateAllRightMaskedInt32x8
-       OpRotateAllRightMaskedInt32x16
-       OpRotateAllRightMaskedInt64x2
-       OpRotateAllRightMaskedInt64x4
-       OpRotateAllRightMaskedInt64x8
-       OpRotateAllRightMaskedUint32x4
-       OpRotateAllRightMaskedUint32x8
-       OpRotateAllRightMaskedUint32x16
-       OpRotateAllRightMaskedUint64x2
-       OpRotateAllRightMaskedUint64x4
-       OpRotateAllRightMaskedUint64x8
        OpRotateAllRightUint32x4
        OpRotateAllRightUint32x8
        OpRotateAllRightUint32x16
@@ -6416,24 +5624,12 @@ const (
        OpRoundToEvenScaledFloat64x2
        OpRoundToEvenScaledFloat64x4
        OpRoundToEvenScaledFloat64x8
-       OpRoundToEvenScaledMaskedFloat32x4
-       OpRoundToEvenScaledMaskedFloat32x8
-       OpRoundToEvenScaledMaskedFloat32x16
-       OpRoundToEvenScaledMaskedFloat64x2
-       OpRoundToEvenScaledMaskedFloat64x4
-       OpRoundToEvenScaledMaskedFloat64x8
        OpRoundToEvenScaledResidueFloat32x4
        OpRoundToEvenScaledResidueFloat32x8
        OpRoundToEvenScaledResidueFloat32x16
        OpRoundToEvenScaledResidueFloat64x2
        OpRoundToEvenScaledResidueFloat64x4
        OpRoundToEvenScaledResidueFloat64x8
-       OpRoundToEvenScaledResidueMaskedFloat32x4
-       OpRoundToEvenScaledResidueMaskedFloat32x8
-       OpRoundToEvenScaledResidueMaskedFloat32x16
-       OpRoundToEvenScaledResidueMaskedFloat64x2
-       OpRoundToEvenScaledResidueMaskedFloat64x4
-       OpRoundToEvenScaledResidueMaskedFloat64x8
        OpSetElemFloat32x4
        OpSetElemFloat64x2
        OpSetElemInt8x16
@@ -6453,24 +5649,6 @@ const (
        OpShiftAllLeftConcatInt64x2
        OpShiftAllLeftConcatInt64x4
        OpShiftAllLeftConcatInt64x8
-       OpShiftAllLeftConcatMaskedInt16x8
-       OpShiftAllLeftConcatMaskedInt16x16
-       OpShiftAllLeftConcatMaskedInt16x32
-       OpShiftAllLeftConcatMaskedInt32x4
-       OpShiftAllLeftConcatMaskedInt32x8
-       OpShiftAllLeftConcatMaskedInt32x16
-       OpShiftAllLeftConcatMaskedInt64x2
-       OpShiftAllLeftConcatMaskedInt64x4
-       OpShiftAllLeftConcatMaskedInt64x8
-       OpShiftAllLeftConcatMaskedUint16x8
-       OpShiftAllLeftConcatMaskedUint16x16
-       OpShiftAllLeftConcatMaskedUint16x32
-       OpShiftAllLeftConcatMaskedUint32x4
-       OpShiftAllLeftConcatMaskedUint32x8
-       OpShiftAllLeftConcatMaskedUint32x16
-       OpShiftAllLeftConcatMaskedUint64x2
-       OpShiftAllLeftConcatMaskedUint64x4
-       OpShiftAllLeftConcatMaskedUint64x8
        OpShiftAllLeftConcatUint16x8
        OpShiftAllLeftConcatUint16x16
        OpShiftAllLeftConcatUint16x32
@@ -6489,24 +5667,6 @@ const (
        OpShiftAllRightConcatInt64x2
        OpShiftAllRightConcatInt64x4
        OpShiftAllRightConcatInt64x8
-       OpShiftAllRightConcatMaskedInt16x8
-       OpShiftAllRightConcatMaskedInt16x16
-       OpShiftAllRightConcatMaskedInt16x32
-       OpShiftAllRightConcatMaskedInt32x4
-       OpShiftAllRightConcatMaskedInt32x8
-       OpShiftAllRightConcatMaskedInt32x16
-       OpShiftAllRightConcatMaskedInt64x2
-       OpShiftAllRightConcatMaskedInt64x4
-       OpShiftAllRightConcatMaskedInt64x8
-       OpShiftAllRightConcatMaskedUint16x8
-       OpShiftAllRightConcatMaskedUint16x16
-       OpShiftAllRightConcatMaskedUint16x32
-       OpShiftAllRightConcatMaskedUint32x4
-       OpShiftAllRightConcatMaskedUint32x8
-       OpShiftAllRightConcatMaskedUint32x16
-       OpShiftAllRightConcatMaskedUint64x2
-       OpShiftAllRightConcatMaskedUint64x4
-       OpShiftAllRightConcatMaskedUint64x8
        OpShiftAllRightConcatUint16x8
        OpShiftAllRightConcatUint16x16
        OpShiftAllRightConcatUint16x32
@@ -6522,24 +5682,12 @@ const (
        OpTruncScaledFloat64x2
        OpTruncScaledFloat64x4
        OpTruncScaledFloat64x8
-       OpTruncScaledMaskedFloat32x4
-       OpTruncScaledMaskedFloat32x8
-       OpTruncScaledMaskedFloat32x16
-       OpTruncScaledMaskedFloat64x2
-       OpTruncScaledMaskedFloat64x4
-       OpTruncScaledMaskedFloat64x8
        OpTruncScaledResidueFloat32x4
        OpTruncScaledResidueFloat32x8
        OpTruncScaledResidueFloat32x16
        OpTruncScaledResidueFloat64x2
        OpTruncScaledResidueFloat64x4
        OpTruncScaledResidueFloat64x8
-       OpTruncScaledResidueMaskedFloat32x4
-       OpTruncScaledResidueMaskedFloat32x8
-       OpTruncScaledResidueMaskedFloat32x16
-       OpTruncScaledResidueMaskedFloat64x2
-       OpTruncScaledResidueMaskedFloat64x4
-       OpTruncScaledResidueMaskedFloat64x8
 )
 
 var opcodeTable = [...]opInfo{
@@ -63838,66 +62986,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "AbsMaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x8",
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "AddDotProdPairsSaturatedInt32x4",
                argLen:  3,
@@ -63913,21 +63001,6 @@ var opcodeTable = [...]opInfo{
                argLen:  3,
                generic: true,
        },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
        {
                name:    "AddDotProdQuadrupleInt32x4",
                argLen:  3,
@@ -63943,21 +63016,6 @@ var opcodeTable = [...]opInfo{
                argLen:  3,
                generic: true,
        },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
        {
                name:    "AddDotProdQuadrupleSaturatedInt32x4",
                argLen:  3,
@@ -63973,21 +63031,6 @@ var opcodeTable = [...]opInfo{
                argLen:  3,
                generic: true,
        },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
        {
                name:        "AddFloat32x4",
                argLen:      2,
@@ -64096,186 +63139,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "AddMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:    "AddPairsFloat32x4",
                argLen:  2,
@@ -64382,78 +63245,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "AddSaturatedMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:        "AddSaturatedUint8x16",
                argLen:      2,
@@ -64654,78 +63445,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "AndMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:    "AndNotInt8x16",
                argLen:  2,
@@ -64786,66 +63505,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "AndNotMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "AndNotUint8x16",
                argLen:  2,
@@ -64978,42 +63637,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "AverageMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:        "AverageUint8x16",
                argLen:      2,
@@ -65081,308 +63704,158 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedFloat32x4",
-               argLen:  2,
+               name:    "Broadcast128Uint8x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedFloat64x2",
-               argLen:  2,
+               name:    "Broadcast128Uint16x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedInt8x16",
-               argLen:  2,
+               name:    "Broadcast128Uint32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedInt16x8",
-               argLen:  2,
+               name:    "Broadcast128Uint64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedInt32x4",
-               argLen:  2,
+               name:    "Broadcast256Float32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedInt64x2",
-               argLen:  2,
+               name:    "Broadcast256Float64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedUint8x16",
-               argLen:  2,
+               name:    "Broadcast256Int8x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedUint16x8",
-               argLen:  2,
+               name:    "Broadcast256Int16x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedUint32x4",
-               argLen:  2,
+               name:    "Broadcast256Int32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128MaskedUint64x2",
-               argLen:  2,
+               name:    "Broadcast256Int64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint8x16",
+               name:    "Broadcast256Uint8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint16x8",
+               name:    "Broadcast256Uint16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint32x4",
+               name:    "Broadcast256Uint32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint64x2",
+               name:    "Broadcast256Uint64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Float32x4",
+               name:    "Broadcast512Float32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Float64x2",
+               name:    "Broadcast512Float64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int8x16",
+               name:    "Broadcast512Int8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int16x8",
+               name:    "Broadcast512Int16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int32x4",
+               name:    "Broadcast512Int32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int64x2",
+               name:    "Broadcast512Int64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedFloat32x4",
-               argLen:  2,
+               name:    "Broadcast512Uint8x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedFloat64x2",
-               argLen:  2,
+               name:    "Broadcast512Uint16x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedInt8x16",
-               argLen:  2,
+               name:    "Broadcast512Uint32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedInt16x8",
-               argLen:  2,
+               name:    "Broadcast512Uint64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedInt32x4",
-               argLen:  2,
+               name:    "CeilFloat32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedInt64x2",
-               argLen:  2,
+               name:    "CeilFloat32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedUint8x16",
-               argLen:  2,
+               name:    "CeilFloat64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedUint16x8",
-               argLen:  2,
+               name:    "CeilFloat64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedUint32x4",
+               name:    "CompressFloat32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "Broadcast256MaskedUint64x2",
+               name:    "CompressFloat32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "Broadcast256Uint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Float32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Float64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x16",
-               argLen:  2,
+               name:    "CompressFloat32x16",
+               argLen:  2,
                generic: true,
        },
        {
@@ -65535,21 +64008,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "ConvertToInt32MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToInt32MaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToInt32MaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "ConvertToUint32Float32x4",
                argLen:  1,
@@ -65565,21 +64023,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "ConvertToUint32MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToUint32MaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToUint32MaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "CopySignInt8x16",
                argLen:  2,
@@ -65640,36 +64083,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "DivMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "DotProdPairsInt16x8",
                argLen:  2,
@@ -65685,36 +64098,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "DotProdPairsMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "DotProdPairsSaturatedUint8x16",
                argLen:  2,
@@ -65838,186 +64221,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "EqualMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:        "EqualUint8x16",
                argLen:      2,
@@ -66260,21 +64463,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "GaloisFieldMulMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldMulMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldMulMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "GaloisFieldMulUint8x16",
                argLen:  2,
@@ -66540,156 +64728,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "GreaterEqualMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "GreaterEqualUint8x64",
                argLen:  2,
@@ -66800,156 +64838,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "GreaterMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "GreaterUint8x64",
                argLen:  2,
@@ -67006,42 +64894,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "IsNanMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:    "LessEqualFloat32x4",
                argLen:  2,
@@ -67092,156 +64944,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "LessEqualMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "LessEqualUint8x64",
                argLen:  2,
@@ -67312,156 +65014,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "LessMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "LessUint8x64",
                argLen:  2,
@@ -67591,4459 +65143,2257 @@ var opcodeTable = [...]opInfo{
                generic:     true,
        },
        {
-               name:        "MaxMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MaxMaskedFloat32x8",
-               argLen:      3,
+               name:        "MaxUint8x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedFloat32x16",
-               argLen:      3,
+               name:        "MaxUint8x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedFloat64x2",
-               argLen:      3,
+               name:        "MaxUint8x64",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedFloat64x4",
-               argLen:      3,
+               name:        "MaxUint16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedFloat64x8",
-               argLen:      3,
+               name:        "MaxUint16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt8x16",
-               argLen:      3,
+               name:        "MaxUint16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt8x32",
-               argLen:      3,
+               name:        "MaxUint32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt8x64",
-               argLen:      3,
+               name:        "MaxUint32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt16x8",
-               argLen:      3,
+               name:        "MaxUint32x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt16x16",
-               argLen:      3,
+               name:        "MaxUint64x2",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt16x32",
-               argLen:      3,
+               name:        "MaxUint64x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt32x4",
-               argLen:      3,
+               name:        "MaxUint64x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt32x8",
-               argLen:      3,
+               name:        "MinFloat32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt32x16",
-               argLen:      3,
+               name:        "MinFloat32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt64x2",
-               argLen:      3,
+               name:        "MinFloat32x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt64x4",
-               argLen:      3,
+               name:        "MinFloat64x2",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedInt64x8",
-               argLen:      3,
+               name:        "MinFloat64x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint8x16",
-               argLen:      3,
+               name:        "MinFloat64x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint8x32",
-               argLen:      3,
+               name:        "MinInt8x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint8x64",
-               argLen:      3,
+               name:        "MinInt8x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint16x8",
-               argLen:      3,
+               name:        "MinInt8x64",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint16x16",
-               argLen:      3,
+               name:        "MinInt16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint16x32",
-               argLen:      3,
+               name:        "MinInt16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint32x4",
-               argLen:      3,
+               name:        "MinInt16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint32x8",
-               argLen:      3,
+               name:        "MinInt32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint32x16",
-               argLen:      3,
+               name:        "MinInt32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint64x2",
-               argLen:      3,
+               name:        "MinInt32x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint64x4",
-               argLen:      3,
+               name:        "MinInt64x2",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxMaskedUint64x8",
-               argLen:      3,
+               name:        "MinInt64x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint8x16",
+               name:        "MinInt64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint8x32",
+               name:        "MinUint8x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint8x64",
+               name:        "MinUint8x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint16x8",
+               name:        "MinUint8x64",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint16x16",
+               name:        "MinUint16x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint16x32",
+               name:        "MinUint16x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint32x4",
+               name:        "MinUint16x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint32x8",
+               name:        "MinUint32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint32x16",
+               name:        "MinUint32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint64x2",
+               name:        "MinUint32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint64x4",
+               name:        "MinUint64x2",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MaxUint64x8",
+               name:        "MinUint64x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinFloat32x4",
+               name:        "MinUint64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinFloat32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x4",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinFloat32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x8",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinFloat64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x16",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinFloat64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat64x2",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinFloat64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat64x4",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinInt8x16",
+               name:    "MulAddFloat64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x2",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:        "MulEvenWidenInt32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt8x32",
+               name:        "MulEvenWidenInt32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt8x64",
+               name:        "MulEvenWidenUint32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt16x8",
+               name:        "MulEvenWidenUint32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt16x16",
+               name:        "MulFloat32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt16x32",
+               name:        "MulFloat32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt32x4",
+               name:        "MulFloat32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt32x8",
+               name:        "MulFloat64x2",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt32x16",
+               name:        "MulFloat64x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt64x2",
+               name:        "MulFloat64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt64x4",
+               name:        "MulHighInt16x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinInt64x8",
+               name:        "MulHighInt16x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat32x4",
-               argLen:      3,
+               name:        "MulHighInt16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat32x8",
-               argLen:      3,
+               name:        "MulHighUint16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat32x16",
-               argLen:      3,
+               name:        "MulHighUint16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat64x2",
-               argLen:      3,
+               name:        "MulHighUint16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat64x4",
-               argLen:      3,
+               name:        "MulInt16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedFloat64x8",
-               argLen:      3,
+               name:        "MulInt16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt8x16",
-               argLen:      3,
+               name:        "MulInt16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt8x32",
-               argLen:      3,
+               name:        "MulInt32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt8x64",
-               argLen:      3,
+               name:        "MulInt32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt16x8",
-               argLen:      3,
+               name:        "MulInt32x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt16x16",
-               argLen:      3,
+               name:        "MulInt64x2",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt16x32",
-               argLen:      3,
+               name:        "MulInt64x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt32x4",
-               argLen:      3,
+               name:        "MulInt64x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x4",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x8",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x16",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x2",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x4",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x8",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MinMaskedUint8x32",
-               argLen:      3,
+               name:        "MulUint16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint8x64",
-               argLen:      3,
+               name:        "MulUint16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint16x8",
-               argLen:      3,
+               name:        "MulUint16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint16x16",
-               argLen:      3,
+               name:        "MulUint32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint16x32",
-               argLen:      3,
+               name:        "MulUint32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint32x4",
-               argLen:      3,
+               name:        "MulUint32x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint32x8",
-               argLen:      3,
+               name:        "MulUint64x2",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint32x16",
-               argLen:      3,
+               name:        "MulUint64x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint64x2",
-               argLen:      3,
+               name:        "MulUint64x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint64x4",
-               argLen:      3,
+               name:        "NotEqualFloat32x4",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinMaskedUint64x8",
-               argLen:      3,
+               name:        "NotEqualFloat32x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint8x16",
+               name:        "NotEqualFloat32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint8x32",
+               name:        "NotEqualFloat64x2",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint8x64",
+               name:        "NotEqualFloat64x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint16x8",
+               name:        "NotEqualFloat64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint16x16",
+               name:        "NotEqualInt8x64",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint16x32",
+               name:        "NotEqualInt16x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint32x4",
+               name:        "NotEqualInt32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint32x8",
+               name:        "NotEqualInt64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint32x16",
+               name:        "NotEqualUint8x64",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint64x2",
+               name:        "NotEqualUint16x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint64x4",
+               name:        "NotEqualUint32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MinUint64x8",
+               name:        "NotEqualUint64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:    "MulAddFloat32x4",
-               argLen:  3,
+               name:    "OnesCountInt8x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddFloat32x8",
-               argLen:  3,
+               name:    "OnesCountInt8x32",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddFloat32x16",
-               argLen:  3,
+               name:    "OnesCountInt8x64",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddFloat64x2",
-               argLen:  3,
+               name:    "OnesCountInt16x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddFloat64x4",
-               argLen:  3,
+               name:    "OnesCountInt16x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddFloat64x8",
-               argLen:  3,
+               name:    "OnesCountInt16x32",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat32x4",
-               argLen:  4,
+               name:    "OnesCountInt32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat32x8",
-               argLen:  4,
+               name:    "OnesCountInt32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat32x16",
-               argLen:  4,
+               name:    "OnesCountInt32x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat64x2",
-               argLen:  4,
+               name:    "OnesCountInt64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat64x4",
-               argLen:  4,
+               name:    "OnesCountInt64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddMaskedFloat64x8",
-               argLen:  4,
+               name:    "OnesCountInt64x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat32x4",
-               argLen:  3,
+               name:    "OnesCountUint8x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat32x8",
-               argLen:  3,
+               name:    "OnesCountUint8x32",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat32x16",
-               argLen:  3,
+               name:    "OnesCountUint8x64",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat64x2",
-               argLen:  3,
+               name:    "OnesCountUint16x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat64x4",
-               argLen:  3,
+               name:    "OnesCountUint16x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubFloat64x8",
-               argLen:  3,
+               name:    "OnesCountUint16x32",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat32x4",
-               argLen:  4,
+               name:    "OnesCountUint32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat32x8",
-               argLen:  4,
+               name:    "OnesCountUint32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat32x16",
-               argLen:  4,
+               name:    "OnesCountUint32x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat64x2",
-               argLen:  4,
+               name:    "OnesCountUint64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat64x4",
-               argLen:  4,
+               name:    "OnesCountUint64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "MulAddSubMaskedFloat64x8",
-               argLen:  4,
+               name:    "OnesCountUint64x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:        "MulEvenWidenInt32x4",
+               name:        "OrInt8x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulEvenWidenInt32x8",
+               name:        "OrInt8x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulEvenWidenUint32x4",
+               name:        "OrInt8x64",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulEvenWidenUint32x8",
+               name:        "OrInt16x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat32x4",
+               name:        "OrInt16x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat32x8",
+               name:        "OrInt16x32",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat32x16",
+               name:        "OrInt32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat64x2",
+               name:        "OrInt32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat64x4",
+               name:        "OrInt32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulFloat64x8",
+               name:        "OrInt64x2",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighInt16x8",
+               name:        "OrInt64x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighInt16x16",
+               name:        "OrInt64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighInt16x32",
+               name:        "OrUint8x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulHighMaskedInt16x16",
-               argLen:      3,
+               name:        "OrUint8x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighMaskedInt16x32",
-               argLen:      3,
+               name:        "OrUint8x64",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighMaskedUint16x8",
-               argLen:      3,
+               name:        "OrUint16x8",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighMaskedUint16x16",
-               argLen:      3,
+               name:        "OrUint16x16",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighMaskedUint16x32",
-               argLen:      3,
+               name:        "OrUint16x32",
+               argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighUint16x8",
+               name:        "OrUint32x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighUint16x16",
+               name:        "OrUint32x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulHighUint16x32",
+               name:        "OrUint32x16",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulInt16x8",
+               name:        "OrUint64x2",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulInt16x16",
+               name:        "OrUint64x4",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulInt16x32",
+               name:        "OrUint64x8",
                argLen:      2,
                commutative: true,
                generic:     true,
        },
        {
-               name:        "MulInt32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x4",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MulInt32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x8",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MulInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x16",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MulInt64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float64x2",
+               argLen:  3,
+               generic: true,
        },
        {
-               name:        "MulInt64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "MulSubAddFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:        "MulUint16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "OnesCountInt8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt8x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt8x64",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x64",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:        "OrInt8x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt8x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "Permute2Float32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x64",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x64",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat64x2",
+               name:    "Permute2Float64x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ScaleMaskedFloat64x4",
+               name:    "Permute2Float64x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ScaleMaskedFloat64x8",
+               name:    "Permute2Int8x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "SetHiFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt32x8",
-               argLen:  2,
+               name:    "Permute2Int8x32",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftInt32x16",
-               argLen:  2,
+               name:    "Permute2Int8x64",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftInt64x2",
-               argLen:  2,
+               name:    "Permute2Int16x8",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftInt64x4",
-               argLen:  2,
+               name:    "Permute2Int16x16",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftInt64x8",
-               argLen:  2,
+               name:    "Permute2Int16x32",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt16x8",
+               name:    "Permute2Int32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt16x16",
+               name:    "Permute2Int32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt16x32",
+               name:    "Permute2Int32x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt32x4",
+               name:    "Permute2Int64x2",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt32x8",
+               name:    "Permute2Int64x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt32x16",
+               name:    "Permute2Int64x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt64x2",
+               name:    "Permute2Uint8x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt64x4",
+               name:    "Permute2Uint8x32",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedInt64x8",
+               name:    "Permute2Uint8x64",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint16x8",
+               name:    "Permute2Uint16x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint16x16",
+               name:    "Permute2Uint16x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint16x32",
+               name:    "Permute2Uint16x32",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint32x4",
+               name:    "Permute2Uint32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint32x8",
+               name:    "Permute2Uint32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint32x16",
+               name:    "Permute2Uint32x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint64x2",
+               name:    "Permute2Uint64x2",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint64x4",
+               name:    "Permute2Uint64x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftMaskedUint64x8",
+               name:    "Permute2Uint64x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint16x8",
+               name:    "PermuteFloat32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint16x16",
+               name:    "PermuteFloat32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint16x32",
+               name:    "PermuteFloat64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint32x4",
+               name:    "PermuteFloat64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint32x8",
+               name:    "PermuteInt8x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint32x16",
+               name:    "PermuteInt8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint64x2",
+               name:    "PermuteInt8x64",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint64x4",
+               name:    "PermuteInt16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllLeftUint64x8",
+               name:    "PermuteInt16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt16x8",
+               name:    "PermuteInt16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt16x16",
+               name:    "PermuteInt32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt16x32",
+               name:    "PermuteInt32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt32x4",
+               name:    "PermuteInt64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt32x8",
+               name:    "PermuteInt64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt32x16",
+               name:    "PermuteUint8x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt64x2",
+               name:    "PermuteUint8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt64x4",
+               name:    "PermuteUint8x64",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightInt64x8",
+               name:    "PermuteUint16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt16x8",
-               argLen:  3,
+               name:    "PermuteUint16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt16x16",
-               argLen:  3,
+               name:    "PermuteUint16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt16x32",
-               argLen:  3,
+               name:    "PermuteUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt32x4",
-               argLen:  3,
+               name:    "PermuteUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt32x8",
-               argLen:  3,
+               name:    "PermuteUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt32x16",
-               argLen:  3,
+               name:    "PermuteUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt64x2",
-               argLen:  3,
+               name:    "ReciprocalFloat32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt64x4",
-               argLen:  3,
+               name:    "ReciprocalFloat32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedInt64x8",
-               argLen:  3,
+               name:    "ReciprocalFloat32x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint16x8",
-               argLen:  3,
+               name:    "ReciprocalFloat64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint16x16",
-               argLen:  3,
+               name:    "ReciprocalFloat64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint16x32",
-               argLen:  3,
+               name:    "ReciprocalFloat64x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint32x4",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint32x8",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint32x16",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint64x2",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint64x4",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightMaskedUint64x8",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint16x8",
+               name:    "RotateLeftInt32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint16x16",
+               name:    "RotateLeftInt32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint16x32",
+               name:    "RotateLeftInt32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint32x4",
+               name:    "RotateLeftInt64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint32x8",
+               name:    "RotateLeftInt64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint32x16",
+               name:    "RotateLeftInt64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint64x2",
+               name:    "RotateLeftUint32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint64x4",
+               name:    "RotateLeftUint32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftAllRightUint64x8",
+               name:    "RotateLeftUint32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt64x4",
-               argLen:  4,
+               name:    "RotateLeftUint64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedInt64x8",
-               argLen:  4,
+               name:    "RotateLeftUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint16x8",
-               argLen:  4,
+               name:    "RotateLeftUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint16x16",
-               argLen:  4,
+               name:    "RotateRightInt32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint16x32",
-               argLen:  4,
+               name:    "RotateRightInt32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint32x4",
-               argLen:  4,
+               name:    "RotateRightInt32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint32x8",
-               argLen:  4,
+               name:    "RotateRightInt64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint32x16",
-               argLen:  4,
+               name:    "RotateRightInt64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint64x2",
-               argLen:  4,
+               name:    "RotateRightInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint64x4",
-               argLen:  4,
+               name:    "RotateRightUint32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatMaskedUint64x8",
-               argLen:  4,
+               name:    "RotateRightUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint16x8",
-               argLen:  3,
+               name:    "RotateRightUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint16x16",
-               argLen:  3,
+               name:    "RotateRightUint64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint16x32",
-               argLen:  3,
+               name:    "RotateRightUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint32x4",
-               argLen:  3,
+               name:    "RotateRightUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint32x8",
-               argLen:  3,
+               name:    "RoundToEvenFloat32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint32x16",
-               argLen:  3,
+               name:    "RoundToEvenFloat32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint64x2",
-               argLen:  3,
+               name:    "RoundToEvenFloat64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint64x4",
-               argLen:  3,
+               name:    "RoundToEvenFloat64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "ShiftLeftConcatUint64x8",
-               argLen:  3,
+               name:    "ScaleFloat32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt16x8",
+               name:    "ScaleFloat32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt16x16",
+               name:    "ScaleFloat32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt16x32",
+               name:    "ScaleFloat64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt32x4",
+               name:    "ScaleFloat64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt32x8",
+               name:    "ScaleFloat64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt32x16",
+               name:    "SetHiFloat32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt64x2",
+               name:    "SetHiFloat32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt64x4",
+               name:    "SetHiFloat64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftInt64x8",
+               name:    "SetHiFloat64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt16x8",
-               argLen:  3,
+               name:    "SetHiInt8x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt16x16",
-               argLen:  3,
+               name:    "SetHiInt8x64",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt16x32",
-               argLen:  3,
+               name:    "SetHiInt16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt32x4",
-               argLen:  3,
+               name:    "SetHiInt16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt32x8",
-               argLen:  3,
+               name:    "SetHiInt32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt32x16",
-               argLen:  3,
+               name:    "SetHiInt32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt64x2",
-               argLen:  3,
+               name:    "SetHiInt64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt64x4",
-               argLen:  3,
+               name:    "SetHiInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedInt64x8",
-               argLen:  3,
+               name:    "SetHiUint8x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint16x8",
-               argLen:  3,
+               name:    "SetHiUint8x64",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint16x16",
-               argLen:  3,
+               name:    "SetHiUint16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint16x32",
-               argLen:  3,
+               name:    "SetHiUint16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint32x4",
-               argLen:  3,
+               name:    "SetHiUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint32x8",
-               argLen:  3,
+               name:    "SetHiUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint32x16",
-               argLen:  3,
+               name:    "SetHiUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint64x2",
-               argLen:  3,
+               name:    "SetHiUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint64x4",
-               argLen:  3,
+               name:    "SetLoFloat32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftMaskedUint64x8",
-               argLen:  3,
+               name:    "SetLoFloat32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint16x8",
+               name:    "SetLoFloat64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint16x16",
+               name:    "SetLoFloat64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint16x32",
+               name:    "SetLoInt8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint32x4",
+               name:    "SetLoInt8x64",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint32x8",
+               name:    "SetLoInt16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint32x16",
+               name:    "SetLoInt16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint64x2",
+               name:    "SetLoInt32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint64x4",
+               name:    "SetLoInt32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftLeftUint64x8",
+               name:    "SetLoInt64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt16x8",
-               argLen:  3,
+               name:    "SetLoInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt16x16",
-               argLen:  3,
+               name:    "SetLoUint8x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt16x32",
-               argLen:  3,
+               name:    "SetLoUint8x64",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt32x4",
-               argLen:  3,
+               name:    "SetLoUint16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt32x8",
-               argLen:  3,
+               name:    "SetLoUint16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt32x16",
-               argLen:  3,
+               name:    "SetLoUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt64x2",
-               argLen:  3,
+               name:    "SetLoUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt64x4",
-               argLen:  3,
+               name:    "SetLoUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatInt64x8",
-               argLen:  3,
+               name:    "SetLoUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt16x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt16x16",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt16x32",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt32x4",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt32x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt32x16",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt64x2",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt64x4",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedInt64x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint16x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint16x16",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint16x32",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint32x4",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint32x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint32x16",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint64x2",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint64x4",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatMaskedUint64x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint16x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint16x16",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint16x32",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint32x4",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint32x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint32x16",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint64x2",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint64x4",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightConcatUint64x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt16x8",
+               name:    "ShiftAllRightUint16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt16x16",
+               name:    "ShiftAllRightUint16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt16x32",
+               name:    "ShiftAllRightUint16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt32x4",
+               name:    "ShiftAllRightUint32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt32x8",
+               name:    "ShiftAllRightUint32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt32x16",
+               name:    "ShiftAllRightUint32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt64x2",
+               name:    "ShiftAllRightUint64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt64x4",
+               name:    "ShiftAllRightUint64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightInt64x8",
+               name:    "ShiftAllRightUint64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt16x8",
+               name:    "ShiftLeftConcatInt16x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt16x16",
+               name:    "ShiftLeftConcatInt16x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt16x32",
+               name:    "ShiftLeftConcatInt16x32",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt32x4",
+               name:    "ShiftLeftConcatInt32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt32x8",
+               name:    "ShiftLeftConcatInt32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt32x16",
+               name:    "ShiftLeftConcatInt32x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt64x2",
+               name:    "ShiftLeftConcatInt64x2",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt64x4",
+               name:    "ShiftLeftConcatInt64x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedInt64x8",
+               name:    "ShiftLeftConcatInt64x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint16x8",
+               name:    "ShiftLeftConcatUint16x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint16x16",
+               name:    "ShiftLeftConcatUint16x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint16x32",
+               name:    "ShiftLeftConcatUint16x32",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint32x4",
+               name:    "ShiftLeftConcatUint32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint32x8",
+               name:    "ShiftLeftConcatUint32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint32x16",
+               name:    "ShiftLeftConcatUint32x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint64x2",
+               name:    "ShiftLeftConcatUint64x2",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint64x4",
+               name:    "ShiftLeftConcatUint64x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightMaskedUint64x8",
+               name:    "ShiftLeftConcatUint64x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "ShiftRightUint16x8",
+               name:    "ShiftLeftInt16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint16x16",
+               name:    "ShiftLeftInt16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint16x32",
+               name:    "ShiftLeftInt16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint32x4",
+               name:    "ShiftLeftInt32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint32x8",
+               name:    "ShiftLeftInt32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint32x16",
+               name:    "ShiftLeftInt32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint64x2",
+               name:    "ShiftLeftInt64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint64x4",
+               name:    "ShiftLeftInt64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "ShiftRightUint64x8",
+               name:    "ShiftLeftInt64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat32x4",
-               argLen:  1,
+               name:    "ShiftLeftUint16x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat32x8",
-               argLen:  1,
+               name:    "ShiftLeftUint16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat32x16",
-               argLen:  1,
+               name:    "ShiftLeftUint16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat64x2",
-               argLen:  1,
+               name:    "ShiftLeftUint32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat64x4",
-               argLen:  1,
+               name:    "ShiftLeftUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtFloat64x8",
-               argLen:  1,
+               name:    "ShiftLeftUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat32x4",
+               name:    "ShiftLeftUint64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat32x8",
+               name:    "ShiftLeftUint64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat32x16",
+               name:    "ShiftLeftUint64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat64x2",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x8",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat64x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x16",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SqrtMaskedFloat64x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x32",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat32x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x4",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat32x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x8",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat32x16",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x16",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat64x2",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x2",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat64x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x4",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubFloat64x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x8",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "SubInt8x16",
+               name:    "ShiftRightConcatUint16x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint16x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint16x32",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x2",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightInt16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt8x32",
+               name:    "ShiftRightInt16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt8x64",
+               name:    "ShiftRightInt16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt16x8",
+               name:    "ShiftRightInt32x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt16x16",
+               name:    "ShiftRightInt32x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt16x32",
+               name:    "ShiftRightInt32x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt32x4",
+               name:    "ShiftRightInt64x2",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt32x8",
+               name:    "ShiftRightInt64x4",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt32x16",
+               name:    "ShiftRightInt64x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt64x2",
+               name:    "ShiftRightUint16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt64x4",
+               name:    "ShiftRightUint16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubInt64x8",
+               name:    "ShiftRightUint16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat32x4",
-               argLen:  3,
+               name:    "ShiftRightUint32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat32x8",
-               argLen:  3,
+               name:    "ShiftRightUint32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat32x16",
-               argLen:  3,
+               name:    "ShiftRightUint32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat64x2",
-               argLen:  3,
+               name:    "ShiftRightUint64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat64x4",
-               argLen:  3,
+               name:    "ShiftRightUint64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedFloat64x8",
-               argLen:  3,
+               name:    "ShiftRightUint64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt8x16",
-               argLen:  3,
+               name:    "SqrtFloat32x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt8x32",
-               argLen:  3,
+               name:    "SqrtFloat32x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt8x64",
-               argLen:  3,
+               name:    "SqrtFloat32x16",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt16x8",
-               argLen:  3,
+               name:    "SqrtFloat64x2",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt16x16",
-               argLen:  3,
+               name:    "SqrtFloat64x4",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt16x32",
-               argLen:  3,
+               name:    "SqrtFloat64x8",
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "SubMaskedInt32x4",
-               argLen:  3,
+               name:    "SubFloat32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt32x8",
-               argLen:  3,
+               name:    "SubFloat32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt32x16",
-               argLen:  3,
+               name:    "SubFloat32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt64x2",
-               argLen:  3,
+               name:    "SubFloat64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt64x4",
-               argLen:  3,
+               name:    "SubFloat64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedInt64x8",
-               argLen:  3,
+               name:    "SubFloat64x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint8x16",
-               argLen:  3,
+               name:    "SubInt8x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint8x32",
-               argLen:  3,
+               name:    "SubInt8x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint8x64",
-               argLen:  3,
+               name:    "SubInt8x64",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint16x8",
-               argLen:  3,
+               name:    "SubInt16x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint16x16",
-               argLen:  3,
+               name:    "SubInt16x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint16x32",
-               argLen:  3,
+               name:    "SubInt16x32",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint32x4",
-               argLen:  3,
+               name:    "SubInt32x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint32x8",
-               argLen:  3,
+               name:    "SubInt32x8",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint32x16",
-               argLen:  3,
+               name:    "SubInt32x16",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint64x2",
-               argLen:  3,
+               name:    "SubInt64x2",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint64x4",
-               argLen:  3,
+               name:    "SubInt64x4",
+               argLen:  2,
                generic: true,
        },
        {
-               name:    "SubMaskedUint64x8",
-               argLen:  3,
+               name:    "SubInt64x8",
+               argLen:  2,
                generic: true,
        },
        {
@@ -72146,66 +67496,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "SubSaturatedMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "SubSaturatedUint8x16",
                argLen:  2,
@@ -72388,78 +67678,6 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
-       {
-               name:        "XorMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
        {
                name:        "XorUint8x16",
                argLen:      2,
@@ -72553,319 +67771,157 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "blendMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "blendMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x16",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x2",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x16",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x2",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "FloorScaledFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
+               name:    "blendMaskedInt32x16",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "FloorScaledFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
+               name:    "blendMaskedInt64x8",
+               argLen:  3,
                generic: true,
        },
        {
-               name:    "FloorScaledFloat32x16",
+               name:    "CeilScaledFloat32x4",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledFloat64x2",
+               name:    "CeilScaledFloat32x8",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledFloat64x4",
+               name:    "CeilScaledFloat32x16",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledFloat64x8",
+               name:    "CeilScaledFloat64x2",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "FloorScaledMaskedFloat32x8",
+               name:    "CeilScaledFloat64x4",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledMaskedFloat32x16",
+               name:    "CeilScaledFloat64x8",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledMaskedFloat64x2",
+               name:    "CeilScaledResidueFloat32x4",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledMaskedFloat64x4",
+               name:    "CeilScaledResidueFloat32x8",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledMaskedFloat64x8",
+               name:    "CeilScaledResidueFloat32x16",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat32x4",
+               name:    "CeilScaledResidueFloat64x2",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat32x8",
+               name:    "CeilScaledResidueFloat64x4",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat32x16",
+               name:    "CeilScaledResidueFloat64x8",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat64x2",
+               name:    "FloorScaledFloat32x4",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat64x4",
+               name:    "FloorScaledFloat32x8",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueFloat64x8",
+               name:    "FloorScaledFloat32x16",
                auxType: auxUInt8,
                argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat32x4",
+               name:    "FloorScaledFloat64x2",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat32x8",
+               name:    "FloorScaledFloat64x4",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat32x16",
+               name:    "FloorScaledFloat64x8",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat64x2",
+               name:    "FloorScaledResidueFloat32x4",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat64x4",
+               name:    "FloorScaledResidueFloat32x8",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "FloorScaledResidueMaskedFloat64x8",
+               name:    "FloorScaledResidueFloat32x16",
                auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x16",
+               name:    "FloorScaledResidueFloat64x2",
                auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x32",
+               name:    "FloorScaledResidueFloat64x4",
                auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                generic: true,
        },
        {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x64",
+               name:    "FloorScaledResidueFloat64x8",
                auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                generic: true,
        },
        {
@@ -72886,24 +67942,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x64",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "GaloisFieldAffineTransformUint8x16",
                auxType: auxUInt8,
@@ -73018,78 +68056,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "RotateAllLeftMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "RotateAllLeftUint32x4",
                auxType: auxUInt8,
@@ -73162,78 +68128,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "RotateAllRightMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "RotateAllRightUint32x4",
                auxType: auxUInt8,
@@ -73306,42 +68200,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "RoundToEvenScaledResidueFloat32x4",
                auxType: auxUInt8,
@@ -73378,42 +68236,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "SetElemFloat32x4",
                auxType: auxUInt8,
@@ -73528,114 +68350,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "ShiftAllLeftConcatUint16x8",
                auxType: auxUInt8,
@@ -73744,114 +68458,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
        {
                name:    "ShiftAllRightConcatUint16x8",
                auxType: auxUInt8,
@@ -73942,42 +68548,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "TruncScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "TruncScaledResidueFloat32x4",
                auxType: auxUInt8,
@@ -74014,42 +68584,6 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
 }
 
 func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
index 69393014c78a85f21e282c812beccd5c36906996..87b1e0586d711be4708de05ef45ee487dafee822 100644 (file)
@@ -537,72 +537,36 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPSLLD256(v)
        case OpAMD64VPSLLD512:
                return rewriteValueAMD64_OpAMD64VPSLLD512(v)
-       case OpAMD64VPSLLDMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v)
-       case OpAMD64VPSLLDMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v)
-       case OpAMD64VPSLLDMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v)
        case OpAMD64VPSLLQ128:
                return rewriteValueAMD64_OpAMD64VPSLLQ128(v)
        case OpAMD64VPSLLQ256:
                return rewriteValueAMD64_OpAMD64VPSLLQ256(v)
        case OpAMD64VPSLLQ512:
                return rewriteValueAMD64_OpAMD64VPSLLQ512(v)
-       case OpAMD64VPSLLQMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v)
-       case OpAMD64VPSLLQMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v)
-       case OpAMD64VPSLLQMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v)
        case OpAMD64VPSLLW128:
                return rewriteValueAMD64_OpAMD64VPSLLW128(v)
        case OpAMD64VPSLLW256:
                return rewriteValueAMD64_OpAMD64VPSLLW256(v)
        case OpAMD64VPSLLW512:
                return rewriteValueAMD64_OpAMD64VPSLLW512(v)
-       case OpAMD64VPSLLWMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v)
-       case OpAMD64VPSLLWMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
-       case OpAMD64VPSLLWMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
        case OpAMD64VPSRAD128:
                return rewriteValueAMD64_OpAMD64VPSRAD128(v)
        case OpAMD64VPSRAD256:
                return rewriteValueAMD64_OpAMD64VPSRAD256(v)
        case OpAMD64VPSRAD512:
                return rewriteValueAMD64_OpAMD64VPSRAD512(v)
-       case OpAMD64VPSRADMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
-       case OpAMD64VPSRADMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
-       case OpAMD64VPSRADMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
        case OpAMD64VPSRAQ128:
                return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
        case OpAMD64VPSRAQ256:
                return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
        case OpAMD64VPSRAQ512:
                return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
-       case OpAMD64VPSRAQMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
-       case OpAMD64VPSRAQMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
-       case OpAMD64VPSRAQMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
        case OpAMD64VPSRAW128:
                return rewriteValueAMD64_OpAMD64VPSRAW128(v)
        case OpAMD64VPSRAW256:
                return rewriteValueAMD64_OpAMD64VPSRAW256(v)
        case OpAMD64VPSRAW512:
                return rewriteValueAMD64_OpAMD64VPSRAW512(v)
-       case OpAMD64VPSRAWMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
-       case OpAMD64VPSRAWMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
-       case OpAMD64VPSRAWMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
        case OpAMD64XADDLlock:
                return rewriteValueAMD64_OpAMD64XADDLlock(v)
        case OpAMD64XADDQlock:
@@ -667,30 +631,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAbsInt8x64:
                v.Op = OpAMD64VPABSB512
                return true
-       case OpAbsMaskedInt16x16:
-               return rewriteValueAMD64_OpAbsMaskedInt16x16(v)
-       case OpAbsMaskedInt16x32:
-               return rewriteValueAMD64_OpAbsMaskedInt16x32(v)
-       case OpAbsMaskedInt16x8:
-               return rewriteValueAMD64_OpAbsMaskedInt16x8(v)
-       case OpAbsMaskedInt32x16:
-               return rewriteValueAMD64_OpAbsMaskedInt32x16(v)
-       case OpAbsMaskedInt32x4:
-               return rewriteValueAMD64_OpAbsMaskedInt32x4(v)
-       case OpAbsMaskedInt32x8:
-               return rewriteValueAMD64_OpAbsMaskedInt32x8(v)
-       case OpAbsMaskedInt64x2:
-               return rewriteValueAMD64_OpAbsMaskedInt64x2(v)
-       case OpAbsMaskedInt64x4:
-               return rewriteValueAMD64_OpAbsMaskedInt64x4(v)
-       case OpAbsMaskedInt64x8:
-               return rewriteValueAMD64_OpAbsMaskedInt64x8(v)
-       case OpAbsMaskedInt8x16:
-               return rewriteValueAMD64_OpAbsMaskedInt8x16(v)
-       case OpAbsMaskedInt8x32:
-               return rewriteValueAMD64_OpAbsMaskedInt8x32(v)
-       case OpAbsMaskedInt8x64:
-               return rewriteValueAMD64_OpAbsMaskedInt8x64(v)
        case OpAdd16:
                v.Op = OpAMD64ADDL
                return true
@@ -718,12 +658,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAddDotProdPairsSaturatedInt32x8:
                v.Op = OpAMD64VPDPWSSDS256
                return true
-       case OpAddDotProdPairsSaturatedMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v)
-       case OpAddDotProdPairsSaturatedMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v)
-       case OpAddDotProdPairsSaturatedMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v)
        case OpAddDotProdQuadrupleInt32x16:
                v.Op = OpAMD64VPDPBUSD512
                return true
@@ -733,12 +667,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAddDotProdQuadrupleInt32x8:
                v.Op = OpAMD64VPDPBUSD256
                return true
-       case OpAddDotProdQuadrupleMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v)
-       case OpAddDotProdQuadrupleMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v)
-       case OpAddDotProdQuadrupleMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v)
        case OpAddDotProdQuadrupleSaturatedInt32x16:
                v.Op = OpAMD64VPDPBUSDS512
                return true
@@ -748,12 +676,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAddDotProdQuadrupleSaturatedInt32x8:
                v.Op = OpAMD64VPDPBUSDS256
                return true
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v)
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v)
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v)
        case OpAddFloat32x16:
                v.Op = OpAMD64VADDPS512
                return true
@@ -808,66 +730,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAddInt8x64:
                v.Op = OpAMD64VPADDB512
                return true
-       case OpAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpAddMaskedFloat32x16(v)
-       case OpAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpAddMaskedFloat32x4(v)
-       case OpAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpAddMaskedFloat32x8(v)
-       case OpAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpAddMaskedFloat64x2(v)
-       case OpAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpAddMaskedFloat64x4(v)
-       case OpAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpAddMaskedFloat64x8(v)
-       case OpAddMaskedInt16x16:
-               return rewriteValueAMD64_OpAddMaskedInt16x16(v)
-       case OpAddMaskedInt16x32:
-               return rewriteValueAMD64_OpAddMaskedInt16x32(v)
-       case OpAddMaskedInt16x8:
-               return rewriteValueAMD64_OpAddMaskedInt16x8(v)
-       case OpAddMaskedInt32x16:
-               return rewriteValueAMD64_OpAddMaskedInt32x16(v)
-       case OpAddMaskedInt32x4:
-               return rewriteValueAMD64_OpAddMaskedInt32x4(v)
-       case OpAddMaskedInt32x8:
-               return rewriteValueAMD64_OpAddMaskedInt32x8(v)
-       case OpAddMaskedInt64x2:
-               return rewriteValueAMD64_OpAddMaskedInt64x2(v)
-       case OpAddMaskedInt64x4:
-               return rewriteValueAMD64_OpAddMaskedInt64x4(v)
-       case OpAddMaskedInt64x8:
-               return rewriteValueAMD64_OpAddMaskedInt64x8(v)
-       case OpAddMaskedInt8x16:
-               return rewriteValueAMD64_OpAddMaskedInt8x16(v)
-       case OpAddMaskedInt8x32:
-               return rewriteValueAMD64_OpAddMaskedInt8x32(v)
-       case OpAddMaskedInt8x64:
-               return rewriteValueAMD64_OpAddMaskedInt8x64(v)
-       case OpAddMaskedUint16x16:
-               return rewriteValueAMD64_OpAddMaskedUint16x16(v)
-       case OpAddMaskedUint16x32:
-               return rewriteValueAMD64_OpAddMaskedUint16x32(v)
-       case OpAddMaskedUint16x8:
-               return rewriteValueAMD64_OpAddMaskedUint16x8(v)
-       case OpAddMaskedUint32x16:
-               return rewriteValueAMD64_OpAddMaskedUint32x16(v)
-       case OpAddMaskedUint32x4:
-               return rewriteValueAMD64_OpAddMaskedUint32x4(v)
-       case OpAddMaskedUint32x8:
-               return rewriteValueAMD64_OpAddMaskedUint32x8(v)
-       case OpAddMaskedUint64x2:
-               return rewriteValueAMD64_OpAddMaskedUint64x2(v)
-       case OpAddMaskedUint64x4:
-               return rewriteValueAMD64_OpAddMaskedUint64x4(v)
-       case OpAddMaskedUint64x8:
-               return rewriteValueAMD64_OpAddMaskedUint64x8(v)
-       case OpAddMaskedUint8x16:
-               return rewriteValueAMD64_OpAddMaskedUint8x16(v)
-       case OpAddMaskedUint8x32:
-               return rewriteValueAMD64_OpAddMaskedUint8x32(v)
-       case OpAddMaskedUint8x64:
-               return rewriteValueAMD64_OpAddMaskedUint8x64(v)
        case OpAddPairsFloat32x4:
                v.Op = OpAMD64VHADDPS128
                return true
@@ -931,30 +793,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAddSaturatedInt8x64:
                v.Op = OpAMD64VPADDSB512
                return true
-       case OpAddSaturatedMaskedInt16x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v)
-       case OpAddSaturatedMaskedInt16x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v)
-       case OpAddSaturatedMaskedInt16x8:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v)
-       case OpAddSaturatedMaskedInt8x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v)
-       case OpAddSaturatedMaskedInt8x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v)
-       case OpAddSaturatedMaskedInt8x64:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v)
-       case OpAddSaturatedMaskedUint16x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v)
-       case OpAddSaturatedMaskedUint16x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v)
-       case OpAddSaturatedMaskedUint16x8:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v)
-       case OpAddSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v)
-       case OpAddSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v)
-       case OpAddSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
        case OpAddSaturatedUint16x16:
                v.Op = OpAMD64VPADDUSW256
                return true
@@ -1074,30 +912,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAndInt8x64:
                v.Op = OpAMD64VPANDD512
                return true
-       case OpAndMaskedInt32x16:
-               return rewriteValueAMD64_OpAndMaskedInt32x16(v)
-       case OpAndMaskedInt32x4:
-               return rewriteValueAMD64_OpAndMaskedInt32x4(v)
-       case OpAndMaskedInt32x8:
-               return rewriteValueAMD64_OpAndMaskedInt32x8(v)
-       case OpAndMaskedInt64x2:
-               return rewriteValueAMD64_OpAndMaskedInt64x2(v)
-       case OpAndMaskedInt64x4:
-               return rewriteValueAMD64_OpAndMaskedInt64x4(v)
-       case OpAndMaskedInt64x8:
-               return rewriteValueAMD64_OpAndMaskedInt64x8(v)
-       case OpAndMaskedUint32x16:
-               return rewriteValueAMD64_OpAndMaskedUint32x16(v)
-       case OpAndMaskedUint32x4:
-               return rewriteValueAMD64_OpAndMaskedUint32x4(v)
-       case OpAndMaskedUint32x8:
-               return rewriteValueAMD64_OpAndMaskedUint32x8(v)
-       case OpAndMaskedUint64x2:
-               return rewriteValueAMD64_OpAndMaskedUint64x2(v)
-       case OpAndMaskedUint64x4:
-               return rewriteValueAMD64_OpAndMaskedUint64x4(v)
-       case OpAndMaskedUint64x8:
-               return rewriteValueAMD64_OpAndMaskedUint64x8(v)
        case OpAndNotInt16x16:
                v.Op = OpAMD64VPANDN256
                return true
@@ -1134,30 +948,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAndNotInt8x64:
                v.Op = OpAMD64VPANDND512
                return true
-       case OpAndNotMaskedInt32x16:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x16(v)
-       case OpAndNotMaskedInt32x4:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x4(v)
-       case OpAndNotMaskedInt32x8:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x8(v)
-       case OpAndNotMaskedInt64x2:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x2(v)
-       case OpAndNotMaskedInt64x4:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x4(v)
-       case OpAndNotMaskedInt64x8:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x8(v)
-       case OpAndNotMaskedUint32x16:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x16(v)
-       case OpAndNotMaskedUint32x4:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x4(v)
-       case OpAndNotMaskedUint32x8:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x8(v)
-       case OpAndNotMaskedUint64x2:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x2(v)
-       case OpAndNotMaskedUint64x4:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x4(v)
-       case OpAndNotMaskedUint64x8:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x8(v)
        case OpAndNotUint16x16:
                v.Op = OpAMD64VPANDN256
                return true
@@ -1276,18 +1066,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAtomicStore8(v)
        case OpAtomicStorePtrNoWB:
                return rewriteValueAMD64_OpAtomicStorePtrNoWB(v)
-       case OpAverageMaskedUint16x16:
-               return rewriteValueAMD64_OpAverageMaskedUint16x16(v)
-       case OpAverageMaskedUint16x32:
-               return rewriteValueAMD64_OpAverageMaskedUint16x32(v)
-       case OpAverageMaskedUint16x8:
-               return rewriteValueAMD64_OpAverageMaskedUint16x8(v)
-       case OpAverageMaskedUint8x16:
-               return rewriteValueAMD64_OpAverageMaskedUint8x16(v)
-       case OpAverageMaskedUint8x32:
-               return rewriteValueAMD64_OpAverageMaskedUint8x32(v)
-       case OpAverageMaskedUint8x64:
-               return rewriteValueAMD64_OpAverageMaskedUint8x64(v)
        case OpAverageUint16x16:
                v.Op = OpAMD64VPAVGW256
                return true
@@ -1335,26 +1113,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpBroadcast128Int8x16:
                v.Op = OpAMD64VPBROADCASTB128
                return true
-       case OpBroadcast128MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
-       case OpBroadcast128MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
-       case OpBroadcast128MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
-       case OpBroadcast128MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
-       case OpBroadcast128MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
-       case OpBroadcast128MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
-       case OpBroadcast128MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
-       case OpBroadcast128MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
-       case OpBroadcast128MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
-       case OpBroadcast128MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
        case OpBroadcast128Uint16x8:
                v.Op = OpAMD64VPBROADCASTW128
                return true
@@ -1385,26 +1143,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpBroadcast256Int8x16:
                v.Op = OpAMD64VPBROADCASTB256
                return true
-       case OpBroadcast256MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
-       case OpBroadcast256MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
-       case OpBroadcast256MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
-       case OpBroadcast256MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
-       case OpBroadcast256MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
-       case OpBroadcast256MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
-       case OpBroadcast256MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
-       case OpBroadcast256MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
-       case OpBroadcast256MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
-       case OpBroadcast256MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
        case OpBroadcast256Uint16x8:
                v.Op = OpAMD64VPBROADCASTW256
                return true
@@ -1435,26 +1173,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpBroadcast512Int8x16:
                v.Op = OpAMD64VPBROADCASTB512
                return true
-       case OpBroadcast512MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
-       case OpBroadcast512MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
-       case OpBroadcast512MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
-       case OpBroadcast512MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
-       case OpBroadcast512MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
-       case OpBroadcast512MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
-       case OpBroadcast512MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
-       case OpBroadcast512MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
-       case OpBroadcast512MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
-       case OpBroadcast512MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
        case OpBroadcast512Uint16x8:
                v.Op = OpAMD64VPBROADCASTW512
                return true
@@ -1497,18 +1215,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpCeilScaledFloat64x4(v)
        case OpCeilScaledFloat64x8:
                return rewriteValueAMD64_OpCeilScaledFloat64x8(v)
-       case OpCeilScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v)
-       case OpCeilScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v)
-       case OpCeilScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v)
-       case OpCeilScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v)
-       case OpCeilScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v)
-       case OpCeilScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v)
        case OpCeilScaledResidueFloat32x16:
                return rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v)
        case OpCeilScaledResidueFloat32x4:
@@ -1521,18 +1227,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v)
        case OpCeilScaledResidueFloat64x8:
                return rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v)
-       case OpCeilScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v)
-       case OpCeilScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v)
-       case OpCeilScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v)
-       case OpCeilScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v)
-       case OpCeilScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v)
-       case OpCeilScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v)
        case OpClosureCall:
                v.Op = OpAMD64CALLclosure
                return true
@@ -1639,12 +1333,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpConvertToInt32Float32x8:
                v.Op = OpAMD64VCVTTPS2DQ256
                return true
-       case OpConvertToInt32MaskedFloat32x16:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v)
-       case OpConvertToInt32MaskedFloat32x4:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v)
-       case OpConvertToInt32MaskedFloat32x8:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v)
        case OpConvertToUint32Float32x16:
                v.Op = OpAMD64VCVTPS2UDQ512
                return true
@@ -1654,12 +1342,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpConvertToUint32Float32x8:
                v.Op = OpAMD64VCVTPS2UDQ256
                return true
-       case OpConvertToUint32MaskedFloat32x16:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v)
-       case OpConvertToUint32MaskedFloat32x4:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
-       case OpConvertToUint32MaskedFloat32x8:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
        case OpCopySignInt16x16:
                v.Op = OpAMD64VPSIGNW256
                return true
@@ -1818,18 +1500,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpDivFloat64x8:
                v.Op = OpAMD64VDIVPD512
                return true
-       case OpDivMaskedFloat32x16:
-               return rewriteValueAMD64_OpDivMaskedFloat32x16(v)
-       case OpDivMaskedFloat32x4:
-               return rewriteValueAMD64_OpDivMaskedFloat32x4(v)
-       case OpDivMaskedFloat32x8:
-               return rewriteValueAMD64_OpDivMaskedFloat32x8(v)
-       case OpDivMaskedFloat64x2:
-               return rewriteValueAMD64_OpDivMaskedFloat64x2(v)
-       case OpDivMaskedFloat64x4:
-               return rewriteValueAMD64_OpDivMaskedFloat64x4(v)
-       case OpDivMaskedFloat64x8:
-               return rewriteValueAMD64_OpDivMaskedFloat64x8(v)
        case OpDotProdPairsInt16x16:
                v.Op = OpAMD64VPMADDWD256
                return true
@@ -1839,18 +1509,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpDotProdPairsInt16x8:
                v.Op = OpAMD64VPMADDWD128
                return true
-       case OpDotProdPairsMaskedInt16x16:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v)
-       case OpDotProdPairsMaskedInt16x32:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v)
-       case OpDotProdPairsMaskedInt16x8:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v)
-       case OpDotProdPairsSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v)
-       case OpDotProdPairsSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v)
-       case OpDotProdPairsSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v)
        case OpDotProdPairsSaturatedUint8x16:
                v.Op = OpAMD64VPMADDUBSW128
                return true
@@ -1920,66 +1578,6 @@ func rewriteValueAMD64(v *Value) bool {
                return true
        case OpEqualInt8x64:
                return rewriteValueAMD64_OpEqualInt8x64(v)
-       case OpEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x16(v)
-       case OpEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x4(v)
-       case OpEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x8(v)
-       case OpEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x2(v)
-       case OpEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x4(v)
-       case OpEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x8(v)
-       case OpEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpEqualMaskedInt16x16(v)
-       case OpEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpEqualMaskedInt16x32(v)
-       case OpEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpEqualMaskedInt16x8(v)
-       case OpEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpEqualMaskedInt32x16(v)
-       case OpEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpEqualMaskedInt32x4(v)
-       case OpEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpEqualMaskedInt32x8(v)
-       case OpEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpEqualMaskedInt64x2(v)
-       case OpEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpEqualMaskedInt64x4(v)
-       case OpEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpEqualMaskedInt64x8(v)
-       case OpEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpEqualMaskedInt8x16(v)
-       case OpEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpEqualMaskedInt8x32(v)
-       case OpEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpEqualMaskedInt8x64(v)
-       case OpEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpEqualMaskedUint16x16(v)
-       case OpEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpEqualMaskedUint16x32(v)
-       case OpEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpEqualMaskedUint16x8(v)
-       case OpEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpEqualMaskedUint32x16(v)
-       case OpEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpEqualMaskedUint32x4(v)
-       case OpEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpEqualMaskedUint32x8(v)
-       case OpEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpEqualMaskedUint64x2(v)
-       case OpEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpEqualMaskedUint64x4(v)
-       case OpEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpEqualMaskedUint64x8(v)
-       case OpEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpEqualMaskedUint8x16(v)
-       case OpEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpEqualMaskedUint8x32(v)
-       case OpEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
        case OpEqualUint16x16:
                v.Op = OpAMD64VPCMPEQW256
                return true
@@ -2096,18 +1694,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpFloorScaledFloat64x4(v)
        case OpFloorScaledFloat64x8:
                return rewriteValueAMD64_OpFloorScaledFloat64x8(v)
-       case OpFloorScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v)
-       case OpFloorScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v)
-       case OpFloorScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v)
-       case OpFloorScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v)
-       case OpFloorScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v)
-       case OpFloorScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v)
        case OpFloorScaledResidueFloat32x16:
                return rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v)
        case OpFloorScaledResidueFloat32x4:
@@ -2120,24 +1706,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v)
        case OpFloorScaledResidueFloat64x8:
                return rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v)
-       case OpFloorScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v)
-       case OpFloorScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v)
-       case OpFloorScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v)
-       case OpFloorScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v)
-       case OpFloorScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
-       case OpFloorScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v)
        case OpGaloisFieldAffineTransformInverseUint8x16:
                v.Op = OpAMD64VGF2P8AFFINEINVQB128
                return true
@@ -2147,12 +1715,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpGaloisFieldAffineTransformInverseUint8x64:
                v.Op = OpAMD64VGF2P8AFFINEINVQB512
                return true
-       case OpGaloisFieldAffineTransformMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v)
-       case OpGaloisFieldAffineTransformMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v)
-       case OpGaloisFieldAffineTransformMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v)
        case OpGaloisFieldAffineTransformUint8x16:
                v.Op = OpAMD64VGF2P8AFFINEQB128
                return true
@@ -2162,12 +1724,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpGaloisFieldAffineTransformUint8x64:
                v.Op = OpAMD64VGF2P8AFFINEQB512
                return true
-       case OpGaloisFieldMulMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v)
-       case OpGaloisFieldMulMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v)
-       case OpGaloisFieldMulMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v)
        case OpGaloisFieldMulUint8x16:
                v.Op = OpAMD64VGF2P8MULB128
                return true
@@ -2318,66 +1874,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpGreaterEqualInt64x8(v)
        case OpGreaterEqualInt8x64:
                return rewriteValueAMD64_OpGreaterEqualInt8x64(v)
-       case OpGreaterEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v)
-       case OpGreaterEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v)
-       case OpGreaterEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v)
-       case OpGreaterEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v)
-       case OpGreaterEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v)
-       case OpGreaterEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v)
-       case OpGreaterEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v)
-       case OpGreaterEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v)
-       case OpGreaterEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v)
-       case OpGreaterEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v)
-       case OpGreaterEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v)
-       case OpGreaterEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v)
-       case OpGreaterEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v)
-       case OpGreaterEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v)
-       case OpGreaterEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v)
-       case OpGreaterEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v)
-       case OpGreaterEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v)
-       case OpGreaterEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v)
-       case OpGreaterEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v)
-       case OpGreaterEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v)
-       case OpGreaterEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v)
-       case OpGreaterEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v)
-       case OpGreaterEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v)
-       case OpGreaterEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v)
-       case OpGreaterEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v)
-       case OpGreaterEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v)
-       case OpGreaterEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v)
-       case OpGreaterEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v)
-       case OpGreaterEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v)
-       case OpGreaterEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v)
        case OpGreaterEqualUint16x32:
                return rewriteValueAMD64_OpGreaterEqualUint16x32(v)
        case OpGreaterEqualUint32x16:
@@ -2430,66 +1926,6 @@ func rewriteValueAMD64(v *Value) bool {
                return true
        case OpGreaterInt8x64:
                return rewriteValueAMD64_OpGreaterInt8x64(v)
-       case OpGreaterMaskedFloat32x16:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x16(v)
-       case OpGreaterMaskedFloat32x4:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x4(v)
-       case OpGreaterMaskedFloat32x8:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x8(v)
-       case OpGreaterMaskedFloat64x2:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x2(v)
-       case OpGreaterMaskedFloat64x4:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x4(v)
-       case OpGreaterMaskedFloat64x8:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x8(v)
-       case OpGreaterMaskedInt16x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x16(v)
-       case OpGreaterMaskedInt16x32:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x32(v)
-       case OpGreaterMaskedInt16x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x8(v)
-       case OpGreaterMaskedInt32x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x16(v)
-       case OpGreaterMaskedInt32x4:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x4(v)
-       case OpGreaterMaskedInt32x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x8(v)
-       case OpGreaterMaskedInt64x2:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x2(v)
-       case OpGreaterMaskedInt64x4:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x4(v)
-       case OpGreaterMaskedInt64x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x8(v)
-       case OpGreaterMaskedInt8x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x16(v)
-       case OpGreaterMaskedInt8x32:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x32(v)
-       case OpGreaterMaskedInt8x64:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x64(v)
-       case OpGreaterMaskedUint16x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x16(v)
-       case OpGreaterMaskedUint16x32:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x32(v)
-       case OpGreaterMaskedUint16x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x8(v)
-       case OpGreaterMaskedUint32x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x16(v)
-       case OpGreaterMaskedUint32x4:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x4(v)
-       case OpGreaterMaskedUint32x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x8(v)
-       case OpGreaterMaskedUint64x2:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x2(v)
-       case OpGreaterMaskedUint64x4:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x4(v)
-       case OpGreaterMaskedUint64x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x8(v)
-       case OpGreaterMaskedUint8x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x16(v)
-       case OpGreaterMaskedUint8x32:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x32(v)
-       case OpGreaterMaskedUint8x64:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x64(v)
        case OpGreaterUint16x32:
                return rewriteValueAMD64_OpGreaterUint16x32(v)
        case OpGreaterUint32x16:
@@ -2529,18 +1965,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpIsNanFloat64x4(v)
        case OpIsNanFloat64x8:
                return rewriteValueAMD64_OpIsNanFloat64x8(v)
-       case OpIsNanMaskedFloat32x16:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x16(v)
-       case OpIsNanMaskedFloat32x4:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x4(v)
-       case OpIsNanMaskedFloat32x8:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x8(v)
-       case OpIsNanMaskedFloat64x2:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x2(v)
-       case OpIsNanMaskedFloat64x4:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x4(v)
-       case OpIsNanMaskedFloat64x8:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x8(v)
        case OpIsNonNil:
                return rewriteValueAMD64_OpIsNonNil(v)
        case OpIsSliceInBounds:
@@ -2605,66 +2029,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpLessEqualInt64x8(v)
        case OpLessEqualInt8x64:
                return rewriteValueAMD64_OpLessEqualInt8x64(v)
-       case OpLessEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v)
-       case OpLessEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v)
-       case OpLessEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v)
-       case OpLessEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v)
-       case OpLessEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v)
-       case OpLessEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v)
-       case OpLessEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x16(v)
-       case OpLessEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x32(v)
-       case OpLessEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x8(v)
-       case OpLessEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x16(v)
-       case OpLessEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x4(v)
-       case OpLessEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x8(v)
-       case OpLessEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x2(v)
-       case OpLessEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x4(v)
-       case OpLessEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x8(v)
-       case OpLessEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x16(v)
-       case OpLessEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x32(v)
-       case OpLessEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x64(v)
-       case OpLessEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x16(v)
-       case OpLessEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x32(v)
-       case OpLessEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x8(v)
-       case OpLessEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x16(v)
-       case OpLessEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x4(v)
-       case OpLessEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x8(v)
-       case OpLessEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x2(v)
-       case OpLessEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x4(v)
-       case OpLessEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x8(v)
-       case OpLessEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x16(v)
-       case OpLessEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v)
-       case OpLessEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v)
        case OpLessEqualUint16x32:
                return rewriteValueAMD64_OpLessEqualUint16x32(v)
        case OpLessEqualUint32x16:
@@ -2693,66 +2057,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpLessInt64x8(v)
        case OpLessInt8x64:
                return rewriteValueAMD64_OpLessInt8x64(v)
-       case OpLessMaskedFloat32x16:
-               return rewriteValueAMD64_OpLessMaskedFloat32x16(v)
-       case OpLessMaskedFloat32x4:
-               return rewriteValueAMD64_OpLessMaskedFloat32x4(v)
-       case OpLessMaskedFloat32x8:
-               return rewriteValueAMD64_OpLessMaskedFloat32x8(v)
-       case OpLessMaskedFloat64x2:
-               return rewriteValueAMD64_OpLessMaskedFloat64x2(v)
-       case OpLessMaskedFloat64x4:
-               return rewriteValueAMD64_OpLessMaskedFloat64x4(v)
-       case OpLessMaskedFloat64x8:
-               return rewriteValueAMD64_OpLessMaskedFloat64x8(v)
-       case OpLessMaskedInt16x16:
-               return rewriteValueAMD64_OpLessMaskedInt16x16(v)
-       case OpLessMaskedInt16x32:
-               return rewriteValueAMD64_OpLessMaskedInt16x32(v)
-       case OpLessMaskedInt16x8:
-               return rewriteValueAMD64_OpLessMaskedInt16x8(v)
-       case OpLessMaskedInt32x16:
-               return rewriteValueAMD64_OpLessMaskedInt32x16(v)
-       case OpLessMaskedInt32x4:
-               return rewriteValueAMD64_OpLessMaskedInt32x4(v)
-       case OpLessMaskedInt32x8:
-               return rewriteValueAMD64_OpLessMaskedInt32x8(v)
-       case OpLessMaskedInt64x2:
-               return rewriteValueAMD64_OpLessMaskedInt64x2(v)
-       case OpLessMaskedInt64x4:
-               return rewriteValueAMD64_OpLessMaskedInt64x4(v)
-       case OpLessMaskedInt64x8:
-               return rewriteValueAMD64_OpLessMaskedInt64x8(v)
-       case OpLessMaskedInt8x16:
-               return rewriteValueAMD64_OpLessMaskedInt8x16(v)
-       case OpLessMaskedInt8x32:
-               return rewriteValueAMD64_OpLessMaskedInt8x32(v)
-       case OpLessMaskedInt8x64:
-               return rewriteValueAMD64_OpLessMaskedInt8x64(v)
-       case OpLessMaskedUint16x16:
-               return rewriteValueAMD64_OpLessMaskedUint16x16(v)
-       case OpLessMaskedUint16x32:
-               return rewriteValueAMD64_OpLessMaskedUint16x32(v)
-       case OpLessMaskedUint16x8:
-               return rewriteValueAMD64_OpLessMaskedUint16x8(v)
-       case OpLessMaskedUint32x16:
-               return rewriteValueAMD64_OpLessMaskedUint32x16(v)
-       case OpLessMaskedUint32x4:
-               return rewriteValueAMD64_OpLessMaskedUint32x4(v)
-       case OpLessMaskedUint32x8:
-               return rewriteValueAMD64_OpLessMaskedUint32x8(v)
-       case OpLessMaskedUint64x2:
-               return rewriteValueAMD64_OpLessMaskedUint64x2(v)
-       case OpLessMaskedUint64x4:
-               return rewriteValueAMD64_OpLessMaskedUint64x4(v)
-       case OpLessMaskedUint64x8:
-               return rewriteValueAMD64_OpLessMaskedUint64x8(v)
-       case OpLessMaskedUint8x16:
-               return rewriteValueAMD64_OpLessMaskedUint8x16(v)
-       case OpLessMaskedUint8x32:
-               return rewriteValueAMD64_OpLessMaskedUint8x32(v)
-       case OpLessMaskedUint8x64:
-               return rewriteValueAMD64_OpLessMaskedUint8x64(v)
        case OpLessUint16x32:
                return rewriteValueAMD64_OpLessUint16x32(v)
        case OpLessUint32x16:
@@ -2887,66 +2191,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMaxInt8x64:
                v.Op = OpAMD64VPMAXSB512
                return true
-       case OpMaxMaskedFloat32x16:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x16(v)
-       case OpMaxMaskedFloat32x4:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x4(v)
-       case OpMaxMaskedFloat32x8:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x8(v)
-       case OpMaxMaskedFloat64x2:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x2(v)
-       case OpMaxMaskedFloat64x4:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x4(v)
-       case OpMaxMaskedFloat64x8:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x8(v)
-       case OpMaxMaskedInt16x16:
-               return rewriteValueAMD64_OpMaxMaskedInt16x16(v)
-       case OpMaxMaskedInt16x32:
-               return rewriteValueAMD64_OpMaxMaskedInt16x32(v)
-       case OpMaxMaskedInt16x8:
-               return rewriteValueAMD64_OpMaxMaskedInt16x8(v)
-       case OpMaxMaskedInt32x16:
-               return rewriteValueAMD64_OpMaxMaskedInt32x16(v)
-       case OpMaxMaskedInt32x4:
-               return rewriteValueAMD64_OpMaxMaskedInt32x4(v)
-       case OpMaxMaskedInt32x8:
-               return rewriteValueAMD64_OpMaxMaskedInt32x8(v)
-       case OpMaxMaskedInt64x2:
-               return rewriteValueAMD64_OpMaxMaskedInt64x2(v)
-       case OpMaxMaskedInt64x4:
-               return rewriteValueAMD64_OpMaxMaskedInt64x4(v)
-       case OpMaxMaskedInt64x8:
-               return rewriteValueAMD64_OpMaxMaskedInt64x8(v)
-       case OpMaxMaskedInt8x16:
-               return rewriteValueAMD64_OpMaxMaskedInt8x16(v)
-       case OpMaxMaskedInt8x32:
-               return rewriteValueAMD64_OpMaxMaskedInt8x32(v)
-       case OpMaxMaskedInt8x64:
-               return rewriteValueAMD64_OpMaxMaskedInt8x64(v)
-       case OpMaxMaskedUint16x16:
-               return rewriteValueAMD64_OpMaxMaskedUint16x16(v)
-       case OpMaxMaskedUint16x32:
-               return rewriteValueAMD64_OpMaxMaskedUint16x32(v)
-       case OpMaxMaskedUint16x8:
-               return rewriteValueAMD64_OpMaxMaskedUint16x8(v)
-       case OpMaxMaskedUint32x16:
-               return rewriteValueAMD64_OpMaxMaskedUint32x16(v)
-       case OpMaxMaskedUint32x4:
-               return rewriteValueAMD64_OpMaxMaskedUint32x4(v)
-       case OpMaxMaskedUint32x8:
-               return rewriteValueAMD64_OpMaxMaskedUint32x8(v)
-       case OpMaxMaskedUint64x2:
-               return rewriteValueAMD64_OpMaxMaskedUint64x2(v)
-       case OpMaxMaskedUint64x4:
-               return rewriteValueAMD64_OpMaxMaskedUint64x4(v)
-       case OpMaxMaskedUint64x8:
-               return rewriteValueAMD64_OpMaxMaskedUint64x8(v)
-       case OpMaxMaskedUint8x16:
-               return rewriteValueAMD64_OpMaxMaskedUint8x16(v)
-       case OpMaxMaskedUint8x32:
-               return rewriteValueAMD64_OpMaxMaskedUint8x32(v)
-       case OpMaxMaskedUint8x64:
-               return rewriteValueAMD64_OpMaxMaskedUint8x64(v)
        case OpMaxUint16x16:
                v.Op = OpAMD64VPMAXUW256
                return true
@@ -3041,66 +2285,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMinInt8x64:
                v.Op = OpAMD64VPMINSB512
                return true
-       case OpMinMaskedFloat32x16:
-               return rewriteValueAMD64_OpMinMaskedFloat32x16(v)
-       case OpMinMaskedFloat32x4:
-               return rewriteValueAMD64_OpMinMaskedFloat32x4(v)
-       case OpMinMaskedFloat32x8:
-               return rewriteValueAMD64_OpMinMaskedFloat32x8(v)
-       case OpMinMaskedFloat64x2:
-               return rewriteValueAMD64_OpMinMaskedFloat64x2(v)
-       case OpMinMaskedFloat64x4:
-               return rewriteValueAMD64_OpMinMaskedFloat64x4(v)
-       case OpMinMaskedFloat64x8:
-               return rewriteValueAMD64_OpMinMaskedFloat64x8(v)
-       case OpMinMaskedInt16x16:
-               return rewriteValueAMD64_OpMinMaskedInt16x16(v)
-       case OpMinMaskedInt16x32:
-               return rewriteValueAMD64_OpMinMaskedInt16x32(v)
-       case OpMinMaskedInt16x8:
-               return rewriteValueAMD64_OpMinMaskedInt16x8(v)
-       case OpMinMaskedInt32x16:
-               return rewriteValueAMD64_OpMinMaskedInt32x16(v)
-       case OpMinMaskedInt32x4:
-               return rewriteValueAMD64_OpMinMaskedInt32x4(v)
-       case OpMinMaskedInt32x8:
-               return rewriteValueAMD64_OpMinMaskedInt32x8(v)
-       case OpMinMaskedInt64x2:
-               return rewriteValueAMD64_OpMinMaskedInt64x2(v)
-       case OpMinMaskedInt64x4:
-               return rewriteValueAMD64_OpMinMaskedInt64x4(v)
-       case OpMinMaskedInt64x8:
-               return rewriteValueAMD64_OpMinMaskedInt64x8(v)
-       case OpMinMaskedInt8x16:
-               return rewriteValueAMD64_OpMinMaskedInt8x16(v)
-       case OpMinMaskedInt8x32:
-               return rewriteValueAMD64_OpMinMaskedInt8x32(v)
-       case OpMinMaskedInt8x64:
-               return rewriteValueAMD64_OpMinMaskedInt8x64(v)
-       case OpMinMaskedUint16x16:
-               return rewriteValueAMD64_OpMinMaskedUint16x16(v)
-       case OpMinMaskedUint16x32:
-               return rewriteValueAMD64_OpMinMaskedUint16x32(v)
-       case OpMinMaskedUint16x8:
-               return rewriteValueAMD64_OpMinMaskedUint16x8(v)
-       case OpMinMaskedUint32x16:
-               return rewriteValueAMD64_OpMinMaskedUint32x16(v)
-       case OpMinMaskedUint32x4:
-               return rewriteValueAMD64_OpMinMaskedUint32x4(v)
-       case OpMinMaskedUint32x8:
-               return rewriteValueAMD64_OpMinMaskedUint32x8(v)
-       case OpMinMaskedUint64x2:
-               return rewriteValueAMD64_OpMinMaskedUint64x2(v)
-       case OpMinMaskedUint64x4:
-               return rewriteValueAMD64_OpMinMaskedUint64x4(v)
-       case OpMinMaskedUint64x8:
-               return rewriteValueAMD64_OpMinMaskedUint64x8(v)
-       case OpMinMaskedUint8x16:
-               return rewriteValueAMD64_OpMinMaskedUint8x16(v)
-       case OpMinMaskedUint8x32:
-               return rewriteValueAMD64_OpMinMaskedUint8x32(v)
-       case OpMinMaskedUint8x64:
-               return rewriteValueAMD64_OpMinMaskedUint8x64(v)
        case OpMinUint16x16:
                v.Op = OpAMD64VPMINUW256
                return true
@@ -3194,18 +2378,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMulAddFloat64x8:
                v.Op = OpAMD64VFMADD213PD512
                return true
-       case OpMulAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x16(v)
-       case OpMulAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x4(v)
-       case OpMulAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x8(v)
-       case OpMulAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x2(v)
-       case OpMulAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x4(v)
-       case OpMulAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x8(v)
        case OpMulAddSubFloat32x16:
                v.Op = OpAMD64VFMADDSUB213PS512
                return true
@@ -3224,18 +2396,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMulAddSubFloat64x8:
                v.Op = OpAMD64VFMADDSUB213PD512
                return true
-       case OpMulAddSubMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v)
-       case OpMulAddSubMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v)
-       case OpMulAddSubMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v)
-       case OpMulAddSubMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v)
-       case OpMulAddSubMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v)
-       case OpMulAddSubMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v)
        case OpMulEvenWidenInt32x4:
                v.Op = OpAMD64VPMULDQ128
                return true
@@ -3275,18 +2435,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMulHighInt16x8:
                v.Op = OpAMD64VPMULHW128
                return true
-       case OpMulHighMaskedInt16x16:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x16(v)
-       case OpMulHighMaskedInt16x32:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x32(v)
-       case OpMulHighMaskedInt16x8:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x8(v)
-       case OpMulHighMaskedUint16x16:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x16(v)
-       case OpMulHighMaskedUint16x32:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x32(v)
-       case OpMulHighMaskedUint16x8:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x8(v)
        case OpMulHighUint16x16:
                v.Op = OpAMD64VPMULHUW256
                return true
@@ -3323,54 +2471,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMulInt64x8:
                v.Op = OpAMD64VPMULLQ512
                return true
-       case OpMulMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulMaskedFloat32x16(v)
-       case OpMulMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulMaskedFloat32x4(v)
-       case OpMulMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulMaskedFloat32x8(v)
-       case OpMulMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulMaskedFloat64x2(v)
-       case OpMulMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulMaskedFloat64x4(v)
-       case OpMulMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulMaskedFloat64x8(v)
-       case OpMulMaskedInt16x16:
-               return rewriteValueAMD64_OpMulMaskedInt16x16(v)
-       case OpMulMaskedInt16x32:
-               return rewriteValueAMD64_OpMulMaskedInt16x32(v)
-       case OpMulMaskedInt16x8:
-               return rewriteValueAMD64_OpMulMaskedInt16x8(v)
-       case OpMulMaskedInt32x16:
-               return rewriteValueAMD64_OpMulMaskedInt32x16(v)
-       case OpMulMaskedInt32x4:
-               return rewriteValueAMD64_OpMulMaskedInt32x4(v)
-       case OpMulMaskedInt32x8:
-               return rewriteValueAMD64_OpMulMaskedInt32x8(v)
-       case OpMulMaskedInt64x2:
-               return rewriteValueAMD64_OpMulMaskedInt64x2(v)
-       case OpMulMaskedInt64x4:
-               return rewriteValueAMD64_OpMulMaskedInt64x4(v)
-       case OpMulMaskedInt64x8:
-               return rewriteValueAMD64_OpMulMaskedInt64x8(v)
-       case OpMulMaskedUint16x16:
-               return rewriteValueAMD64_OpMulMaskedUint16x16(v)
-       case OpMulMaskedUint16x32:
-               return rewriteValueAMD64_OpMulMaskedUint16x32(v)
-       case OpMulMaskedUint16x8:
-               return rewriteValueAMD64_OpMulMaskedUint16x8(v)
-       case OpMulMaskedUint32x16:
-               return rewriteValueAMD64_OpMulMaskedUint32x16(v)
-       case OpMulMaskedUint32x4:
-               return rewriteValueAMD64_OpMulMaskedUint32x4(v)
-       case OpMulMaskedUint32x8:
-               return rewriteValueAMD64_OpMulMaskedUint32x8(v)
-       case OpMulMaskedUint64x2:
-               return rewriteValueAMD64_OpMulMaskedUint64x2(v)
-       case OpMulMaskedUint64x4:
-               return rewriteValueAMD64_OpMulMaskedUint64x4(v)
-       case OpMulMaskedUint64x8:
-               return rewriteValueAMD64_OpMulMaskedUint64x8(v)
        case OpMulSubAddFloat32x16:
                v.Op = OpAMD64VFMSUBADD213PS512
                return true
@@ -3389,18 +2489,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpMulSubAddFloat64x8:
                v.Op = OpAMD64VFMSUBADD213PD512
                return true
-       case OpMulSubAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v)
-       case OpMulSubAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v)
-       case OpMulSubAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v)
-       case OpMulSubAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v)
-       case OpMulSubAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v)
-       case OpMulSubAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v)
        case OpMulUint16x16:
                v.Op = OpAMD64VPMULLW256
                return true
@@ -3485,66 +2573,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpNotEqualInt64x8(v)
        case OpNotEqualInt8x64:
                return rewriteValueAMD64_OpNotEqualInt8x64(v)
-       case OpNotEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v)
-       case OpNotEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v)
-       case OpNotEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v)
-       case OpNotEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v)
-       case OpNotEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v)
-       case OpNotEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v)
-       case OpNotEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x16(v)
-       case OpNotEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x32(v)
-       case OpNotEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x8(v)
-       case OpNotEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x16(v)
-       case OpNotEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x4(v)
-       case OpNotEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x8(v)
-       case OpNotEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x2(v)
-       case OpNotEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x4(v)
-       case OpNotEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x8(v)
-       case OpNotEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x16(v)
-       case OpNotEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x32(v)
-       case OpNotEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x64(v)
-       case OpNotEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x16(v)
-       case OpNotEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x32(v)
-       case OpNotEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x8(v)
-       case OpNotEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x16(v)
-       case OpNotEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x4(v)
-       case OpNotEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x8(v)
-       case OpNotEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x2(v)
-       case OpNotEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x4(v)
-       case OpNotEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x8(v)
-       case OpNotEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x16(v)
-       case OpNotEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v)
-       case OpNotEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v)
        case OpNotEqualUint16x32:
                return rewriteValueAMD64_OpNotEqualUint16x32(v)
        case OpNotEqualUint32x16:
@@ -3591,54 +2619,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpOnesCountInt8x64:
                v.Op = OpAMD64VPOPCNTB512
                return true
-       case OpOnesCountMaskedInt16x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x16(v)
-       case OpOnesCountMaskedInt16x32:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x32(v)
-       case OpOnesCountMaskedInt16x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x8(v)
-       case OpOnesCountMaskedInt32x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x16(v)
-       case OpOnesCountMaskedInt32x4:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x4(v)
-       case OpOnesCountMaskedInt32x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x8(v)
-       case OpOnesCountMaskedInt64x2:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x2(v)
-       case OpOnesCountMaskedInt64x4:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x4(v)
-       case OpOnesCountMaskedInt64x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x8(v)
-       case OpOnesCountMaskedInt8x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x16(v)
-       case OpOnesCountMaskedInt8x32:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x32(v)
-       case OpOnesCountMaskedInt8x64:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x64(v)
-       case OpOnesCountMaskedUint16x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x16(v)
-       case OpOnesCountMaskedUint16x32:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x32(v)
-       case OpOnesCountMaskedUint16x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x8(v)
-       case OpOnesCountMaskedUint32x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x16(v)
-       case OpOnesCountMaskedUint32x4:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x4(v)
-       case OpOnesCountMaskedUint32x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x8(v)
-       case OpOnesCountMaskedUint64x2:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x2(v)
-       case OpOnesCountMaskedUint64x4:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x4(v)
-       case OpOnesCountMaskedUint64x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x8(v)
-       case OpOnesCountMaskedUint8x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x16(v)
-       case OpOnesCountMaskedUint8x32:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x32(v)
-       case OpOnesCountMaskedUint8x64:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x64(v)
        case OpOnesCountUint16x16:
                v.Op = OpAMD64VPOPCNTW256
                return true
@@ -3726,30 +2706,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpOrInt8x64:
                v.Op = OpAMD64VPORD512
                return true
-       case OpOrMaskedInt32x16:
-               return rewriteValueAMD64_OpOrMaskedInt32x16(v)
-       case OpOrMaskedInt32x4:
-               return rewriteValueAMD64_OpOrMaskedInt32x4(v)
-       case OpOrMaskedInt32x8:
-               return rewriteValueAMD64_OpOrMaskedInt32x8(v)
-       case OpOrMaskedInt64x2:
-               return rewriteValueAMD64_OpOrMaskedInt64x2(v)
-       case OpOrMaskedInt64x4:
-               return rewriteValueAMD64_OpOrMaskedInt64x4(v)
-       case OpOrMaskedInt64x8:
-               return rewriteValueAMD64_OpOrMaskedInt64x8(v)
-       case OpOrMaskedUint32x16:
-               return rewriteValueAMD64_OpOrMaskedUint32x16(v)
-       case OpOrMaskedUint32x4:
-               return rewriteValueAMD64_OpOrMaskedUint32x4(v)
-       case OpOrMaskedUint32x8:
-               return rewriteValueAMD64_OpOrMaskedUint32x8(v)
-       case OpOrMaskedUint64x2:
-               return rewriteValueAMD64_OpOrMaskedUint64x2(v)
-       case OpOrMaskedUint64x4:
-               return rewriteValueAMD64_OpOrMaskedUint64x4(v)
-       case OpOrMaskedUint64x8:
-               return rewriteValueAMD64_OpOrMaskedUint64x8(v)
        case OpOrUint16x16:
                v.Op = OpAMD64VPOR256
                return true
@@ -3843,66 +2799,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpPermute2Int8x64:
                v.Op = OpAMD64VPERMI2B512
                return true
-       case OpPermute2MaskedFloat32x16:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x16(v)
-       case OpPermute2MaskedFloat32x4:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x4(v)
-       case OpPermute2MaskedFloat32x8:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x8(v)
-       case OpPermute2MaskedFloat64x2:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x2(v)
-       case OpPermute2MaskedFloat64x4:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x4(v)
-       case OpPermute2MaskedFloat64x8:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x8(v)
-       case OpPermute2MaskedInt16x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x16(v)
-       case OpPermute2MaskedInt16x32:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x32(v)
-       case OpPermute2MaskedInt16x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x8(v)
-       case OpPermute2MaskedInt32x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x16(v)
-       case OpPermute2MaskedInt32x4:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x4(v)
-       case OpPermute2MaskedInt32x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x8(v)
-       case OpPermute2MaskedInt64x2:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x2(v)
-       case OpPermute2MaskedInt64x4:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x4(v)
-       case OpPermute2MaskedInt64x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x8(v)
-       case OpPermute2MaskedInt8x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x16(v)
-       case OpPermute2MaskedInt8x32:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x32(v)
-       case OpPermute2MaskedInt8x64:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x64(v)
-       case OpPermute2MaskedUint16x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x16(v)
-       case OpPermute2MaskedUint16x32:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x32(v)
-       case OpPermute2MaskedUint16x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x8(v)
-       case OpPermute2MaskedUint32x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x16(v)
-       case OpPermute2MaskedUint32x4:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x4(v)
-       case OpPermute2MaskedUint32x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x8(v)
-       case OpPermute2MaskedUint64x2:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x2(v)
-       case OpPermute2MaskedUint64x4:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x4(v)
-       case OpPermute2MaskedUint64x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x8(v)
-       case OpPermute2MaskedUint8x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x16(v)
-       case OpPermute2MaskedUint8x32:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x32(v)
-       case OpPermute2MaskedUint8x64:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x64(v)
        case OpPermute2Uint16x16:
                v.Op = OpAMD64VPERMI2W256
                return true
@@ -3981,54 +2877,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpPermuteInt8x64:
                v.Op = OpAMD64VPERMB512
                return true
-       case OpPermuteMaskedFloat32x16:
-               return rewriteValueAMD64_OpPermuteMaskedFloat32x16(v)
-       case OpPermuteMaskedFloat32x8:
-               return rewriteValueAMD64_OpPermuteMaskedFloat32x8(v)
-       case OpPermuteMaskedFloat64x4:
-               return rewriteValueAMD64_OpPermuteMaskedFloat64x4(v)
-       case OpPermuteMaskedFloat64x8:
-               return rewriteValueAMD64_OpPermuteMaskedFloat64x8(v)
-       case OpPermuteMaskedInt16x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x16(v)
-       case OpPermuteMaskedInt16x32:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x32(v)
-       case OpPermuteMaskedInt16x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x8(v)
-       case OpPermuteMaskedInt32x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt32x16(v)
-       case OpPermuteMaskedInt32x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt32x8(v)
-       case OpPermuteMaskedInt64x4:
-               return rewriteValueAMD64_OpPermuteMaskedInt64x4(v)
-       case OpPermuteMaskedInt64x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt64x8(v)
-       case OpPermuteMaskedInt8x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x16(v)
-       case OpPermuteMaskedInt8x32:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x32(v)
-       case OpPermuteMaskedInt8x64:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x64(v)
-       case OpPermuteMaskedUint16x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x16(v)
-       case OpPermuteMaskedUint16x32:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x32(v)
-       case OpPermuteMaskedUint16x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x8(v)
-       case OpPermuteMaskedUint32x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint32x16(v)
-       case OpPermuteMaskedUint32x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint32x8(v)
-       case OpPermuteMaskedUint64x4:
-               return rewriteValueAMD64_OpPermuteMaskedUint64x4(v)
-       case OpPermuteMaskedUint64x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint64x8(v)
-       case OpPermuteMaskedUint8x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x16(v)
-       case OpPermuteMaskedUint8x32:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x32(v)
-       case OpPermuteMaskedUint8x64:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x64(v)
        case OpPermuteUint16x16:
                v.Op = OpAMD64VPERMW256
                return true
@@ -4093,18 +2941,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpReciprocalFloat64x8:
                v.Op = OpAMD64VRCP14PD512
                return true
-       case OpReciprocalMaskedFloat32x16:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v)
-       case OpReciprocalMaskedFloat32x4:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v)
-       case OpReciprocalMaskedFloat32x8:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v)
-       case OpReciprocalMaskedFloat64x2:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v)
-       case OpReciprocalMaskedFloat64x4:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v)
-       case OpReciprocalMaskedFloat64x8:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v)
        case OpReciprocalSqrtFloat32x16:
                v.Op = OpAMD64VRSQRT14PS512
                return true
@@ -4123,18 +2959,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpReciprocalSqrtFloat64x8:
                v.Op = OpAMD64VRSQRT14PD512
                return true
-       case OpReciprocalSqrtMaskedFloat32x16:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v)
-       case OpReciprocalSqrtMaskedFloat32x4:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v)
-       case OpReciprocalSqrtMaskedFloat32x8:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v)
-       case OpReciprocalSqrtMaskedFloat64x2:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v)
-       case OpReciprocalSqrtMaskedFloat64x4:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v)
-       case OpReciprocalSqrtMaskedFloat64x8:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v)
        case OpRotateAllLeftInt32x16:
                v.Op = OpAMD64VPROLD512
                return true
@@ -4153,30 +2977,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpRotateAllLeftInt64x8:
                v.Op = OpAMD64VPROLQ512
                return true
-       case OpRotateAllLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v)
-       case OpRotateAllLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v)
-       case OpRotateAllLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v)
-       case OpRotateAllLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v)
-       case OpRotateAllLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v)
-       case OpRotateAllLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v)
-       case OpRotateAllLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v)
-       case OpRotateAllLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v)
-       case OpRotateAllLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v)
-       case OpRotateAllLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v)
-       case OpRotateAllLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v)
-       case OpRotateAllLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v)
        case OpRotateAllLeftUint32x16:
                v.Op = OpAMD64VPROLD512
                return true
@@ -4213,30 +3013,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpRotateAllRightInt64x8:
                v.Op = OpAMD64VPRORQ512
                return true
-       case OpRotateAllRightMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v)
-       case OpRotateAllRightMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v)
-       case OpRotateAllRightMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v)
-       case OpRotateAllRightMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v)
-       case OpRotateAllRightMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v)
-       case OpRotateAllRightMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v)
-       case OpRotateAllRightMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v)
-       case OpRotateAllRightMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v)
-       case OpRotateAllRightMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v)
-       case OpRotateAllRightMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v)
-       case OpRotateAllRightMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v)
-       case OpRotateAllRightMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v)
        case OpRotateAllRightUint32x16:
                v.Op = OpAMD64VPRORD512
                return true
@@ -4285,30 +3061,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpRotateLeftInt64x8:
                v.Op = OpAMD64VPROLVQ512
                return true
-       case OpRotateLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v)
-       case OpRotateLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v)
-       case OpRotateLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v)
-       case OpRotateLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v)
-       case OpRotateLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v)
-       case OpRotateLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v)
-       case OpRotateLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v)
-       case OpRotateLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v)
-       case OpRotateLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v)
-       case OpRotateLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v)
-       case OpRotateLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v)
-       case OpRotateLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v)
        case OpRotateLeftUint32x16:
                v.Op = OpAMD64VPROLVD512
                return true
@@ -4345,30 +3097,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpRotateRightInt64x8:
                v.Op = OpAMD64VPRORVQ512
                return true
-       case OpRotateRightMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x16(v)
-       case OpRotateRightMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x4(v)
-       case OpRotateRightMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x8(v)
-       case OpRotateRightMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x2(v)
-       case OpRotateRightMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x4(v)
-       case OpRotateRightMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x8(v)
-       case OpRotateRightMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x16(v)
-       case OpRotateRightMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x4(v)
-       case OpRotateRightMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x8(v)
-       case OpRotateRightMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x2(v)
-       case OpRotateRightMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x4(v)
-       case OpRotateRightMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x8(v)
        case OpRotateRightUint32x16:
                v.Op = OpAMD64VPRORVD512
                return true
@@ -4415,18 +3143,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v)
        case OpRoundToEvenScaledFloat64x8:
                return rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v)
-       case OpRoundToEvenScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v)
-       case OpRoundToEvenScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v)
-       case OpRoundToEvenScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v)
-       case OpRoundToEvenScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v)
-       case OpRoundToEvenScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v)
-       case OpRoundToEvenScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v)
        case OpRoundToEvenScaledResidueFloat32x16:
                return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v)
        case OpRoundToEvenScaledResidueFloat32x4:
@@ -4439,18 +3155,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v)
        case OpRoundToEvenScaledResidueFloat64x8:
                return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v)
        case OpRsh16Ux16:
                return rewriteValueAMD64_OpRsh16Ux16(v)
        case OpRsh16Ux32:
@@ -4533,18 +3237,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpScaleFloat64x8:
                v.Op = OpAMD64VSCALEFPD512
                return true
-       case OpScaleMaskedFloat32x16:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x16(v)
-       case OpScaleMaskedFloat32x4:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x4(v)
-       case OpScaleMaskedFloat32x8:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x8(v)
-       case OpScaleMaskedFloat64x2:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x2(v)
-       case OpScaleMaskedFloat64x4:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x4(v)
-       case OpScaleMaskedFloat64x8:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x8(v)
        case OpSelect0:
                return rewriteValueAMD64_OpSelect0(v)
        case OpSelect1:
@@ -4688,42 +3380,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllLeftConcatInt64x8:
                v.Op = OpAMD64VPSHLDQ512
                return true
-       case OpShiftAllLeftConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v)
-       case OpShiftAllLeftConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v)
-       case OpShiftAllLeftConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v)
-       case OpShiftAllLeftConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v)
-       case OpShiftAllLeftConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v)
-       case OpShiftAllLeftConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v)
-       case OpShiftAllLeftConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v)
-       case OpShiftAllLeftConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v)
-       case OpShiftAllLeftConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v)
-       case OpShiftAllLeftConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v)
-       case OpShiftAllLeftConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v)
-       case OpShiftAllLeftConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v)
-       case OpShiftAllLeftConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v)
-       case OpShiftAllLeftConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v)
-       case OpShiftAllLeftConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v)
-       case OpShiftAllLeftConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v)
-       case OpShiftAllLeftConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v)
-       case OpShiftAllLeftConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v)
        case OpShiftAllLeftConcatUint16x16:
                v.Op = OpAMD64VPSHLDW256
                return true
@@ -4778,42 +3434,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllLeftInt64x8:
                v.Op = OpAMD64VPSLLQ512
                return true
-       case OpShiftAllLeftMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
-       case OpShiftAllLeftMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v)
-       case OpShiftAllLeftMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v)
-       case OpShiftAllLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v)
-       case OpShiftAllLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v)
-       case OpShiftAllLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v)
-       case OpShiftAllLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v)
-       case OpShiftAllLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v)
-       case OpShiftAllLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v)
-       case OpShiftAllLeftMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v)
-       case OpShiftAllLeftMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v)
-       case OpShiftAllLeftMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v)
-       case OpShiftAllLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v)
-       case OpShiftAllLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v)
-       case OpShiftAllLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v)
-       case OpShiftAllLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v)
-       case OpShiftAllLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v)
-       case OpShiftAllLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
        case OpShiftAllLeftUint16x16:
                v.Op = OpAMD64VPSLLW256
                return true
@@ -4868,42 +3488,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllRightConcatInt64x8:
                v.Op = OpAMD64VPSHRDQ512
                return true
-       case OpShiftAllRightConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v)
-       case OpShiftAllRightConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v)
-       case OpShiftAllRightConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v)
-       case OpShiftAllRightConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v)
-       case OpShiftAllRightConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v)
-       case OpShiftAllRightConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v)
-       case OpShiftAllRightConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v)
-       case OpShiftAllRightConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v)
-       case OpShiftAllRightConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v)
-       case OpShiftAllRightConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v)
-       case OpShiftAllRightConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v)
-       case OpShiftAllRightConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v)
-       case OpShiftAllRightConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v)
-       case OpShiftAllRightConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v)
-       case OpShiftAllRightConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v)
-       case OpShiftAllRightConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v)
-       case OpShiftAllRightConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v)
-       case OpShiftAllRightConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v)
        case OpShiftAllRightConcatUint16x16:
                v.Op = OpAMD64VPSHRDW256
                return true
@@ -4958,42 +3542,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllRightInt64x8:
                v.Op = OpAMD64VPSRAQ512
                return true
-       case OpShiftAllRightMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
-       case OpShiftAllRightMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v)
-       case OpShiftAllRightMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v)
-       case OpShiftAllRightMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v)
-       case OpShiftAllRightMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v)
-       case OpShiftAllRightMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v)
-       case OpShiftAllRightMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v)
-       case OpShiftAllRightMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v)
-       case OpShiftAllRightMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v)
-       case OpShiftAllRightMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v)
-       case OpShiftAllRightMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v)
-       case OpShiftAllRightMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v)
-       case OpShiftAllRightMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v)
-       case OpShiftAllRightMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v)
-       case OpShiftAllRightMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v)
-       case OpShiftAllRightMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v)
-       case OpShiftAllRightMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v)
-       case OpShiftAllRightMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
        case OpShiftAllRightUint16x16:
                v.Op = OpAMD64VPSRLW256
                return true
@@ -5048,42 +3596,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftLeftConcatInt64x8:
                v.Op = OpAMD64VPSHLDVQ512
                return true
-       case OpShiftLeftConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v)
-       case OpShiftLeftConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v)
-       case OpShiftLeftConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v)
-       case OpShiftLeftConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v)
-       case OpShiftLeftConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v)
-       case OpShiftLeftConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v)
-       case OpShiftLeftConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v)
-       case OpShiftLeftConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v)
-       case OpShiftLeftConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v)
-       case OpShiftLeftConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v)
-       case OpShiftLeftConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v)
-       case OpShiftLeftConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v)
-       case OpShiftLeftConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v)
-       case OpShiftLeftConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v)
-       case OpShiftLeftConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v)
-       case OpShiftLeftConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v)
-       case OpShiftLeftConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v)
-       case OpShiftLeftConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v)
        case OpShiftLeftConcatUint16x16:
                v.Op = OpAMD64VPSHLDVW256
                return true
@@ -5138,42 +3650,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftLeftInt64x8:
                v.Op = OpAMD64VPSLLVQ512
                return true
-       case OpShiftLeftMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v)
-       case OpShiftLeftMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v)
-       case OpShiftLeftMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v)
-       case OpShiftLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v)
-       case OpShiftLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v)
-       case OpShiftLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v)
-       case OpShiftLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v)
-       case OpShiftLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v)
-       case OpShiftLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v)
-       case OpShiftLeftMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v)
-       case OpShiftLeftMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v)
-       case OpShiftLeftMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v)
-       case OpShiftLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v)
-       case OpShiftLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v)
-       case OpShiftLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v)
-       case OpShiftLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v)
-       case OpShiftLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v)
-       case OpShiftLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v)
        case OpShiftLeftUint16x16:
                v.Op = OpAMD64VPSLLVW256
                return true
@@ -5228,42 +3704,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftRightConcatInt64x8:
                v.Op = OpAMD64VPSHRDVQ512
                return true
-       case OpShiftRightConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v)
-       case OpShiftRightConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v)
-       case OpShiftRightConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v)
-       case OpShiftRightConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v)
-       case OpShiftRightConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v)
-       case OpShiftRightConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v)
-       case OpShiftRightConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v)
-       case OpShiftRightConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v)
-       case OpShiftRightConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v)
-       case OpShiftRightConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v)
-       case OpShiftRightConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v)
-       case OpShiftRightConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v)
-       case OpShiftRightConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v)
-       case OpShiftRightConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v)
-       case OpShiftRightConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v)
-       case OpShiftRightConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v)
-       case OpShiftRightConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v)
-       case OpShiftRightConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v)
        case OpShiftRightConcatUint16x16:
                v.Op = OpAMD64VPSHRDVW256
                return true
@@ -5318,42 +3758,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftRightInt64x8:
                v.Op = OpAMD64VPSRAVQ512
                return true
-       case OpShiftRightMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v)
-       case OpShiftRightMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x32(v)
-       case OpShiftRightMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x8(v)
-       case OpShiftRightMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x16(v)
-       case OpShiftRightMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x4(v)
-       case OpShiftRightMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x8(v)
-       case OpShiftRightMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x2(v)
-       case OpShiftRightMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x4(v)
-       case OpShiftRightMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x8(v)
-       case OpShiftRightMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x16(v)
-       case OpShiftRightMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x32(v)
-       case OpShiftRightMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x8(v)
-       case OpShiftRightMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x16(v)
-       case OpShiftRightMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x4(v)
-       case OpShiftRightMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x8(v)
-       case OpShiftRightMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x2(v)
-       case OpShiftRightMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v)
-       case OpShiftRightMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v)
        case OpShiftRightUint16x16:
                v.Op = OpAMD64VPSRLVW256
                return true
@@ -5429,18 +3833,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpSqrtFloat64x8:
                v.Op = OpAMD64VSQRTPD512
                return true
-       case OpSqrtMaskedFloat32x16:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x16(v)
-       case OpSqrtMaskedFloat32x4:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x4(v)
-       case OpSqrtMaskedFloat32x8:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x8(v)
-       case OpSqrtMaskedFloat64x2:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x2(v)
-       case OpSqrtMaskedFloat64x4:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x4(v)
-       case OpSqrtMaskedFloat64x8:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x8(v)
        case OpStaticCall:
                v.Op = OpAMD64CALLstatic
                return true
@@ -5550,66 +3942,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpSubInt8x64:
                v.Op = OpAMD64VPSUBB512
                return true
-       case OpSubMaskedFloat32x16:
-               return rewriteValueAMD64_OpSubMaskedFloat32x16(v)
-       case OpSubMaskedFloat32x4:
-               return rewriteValueAMD64_OpSubMaskedFloat32x4(v)
-       case OpSubMaskedFloat32x8:
-               return rewriteValueAMD64_OpSubMaskedFloat32x8(v)
-       case OpSubMaskedFloat64x2:
-               return rewriteValueAMD64_OpSubMaskedFloat64x2(v)
-       case OpSubMaskedFloat64x4:
-               return rewriteValueAMD64_OpSubMaskedFloat64x4(v)
-       case OpSubMaskedFloat64x8:
-               return rewriteValueAMD64_OpSubMaskedFloat64x8(v)
-       case OpSubMaskedInt16x16:
-               return rewriteValueAMD64_OpSubMaskedInt16x16(v)
-       case OpSubMaskedInt16x32:
-               return rewriteValueAMD64_OpSubMaskedInt16x32(v)
-       case OpSubMaskedInt16x8:
-               return rewriteValueAMD64_OpSubMaskedInt16x8(v)
-       case OpSubMaskedInt32x16:
-               return rewriteValueAMD64_OpSubMaskedInt32x16(v)
-       case OpSubMaskedInt32x4:
-               return rewriteValueAMD64_OpSubMaskedInt32x4(v)
-       case OpSubMaskedInt32x8:
-               return rewriteValueAMD64_OpSubMaskedInt32x8(v)
-       case OpSubMaskedInt64x2:
-               return rewriteValueAMD64_OpSubMaskedInt64x2(v)
-       case OpSubMaskedInt64x4:
-               return rewriteValueAMD64_OpSubMaskedInt64x4(v)
-       case OpSubMaskedInt64x8:
-               return rewriteValueAMD64_OpSubMaskedInt64x8(v)
-       case OpSubMaskedInt8x16:
-               return rewriteValueAMD64_OpSubMaskedInt8x16(v)
-       case OpSubMaskedInt8x32:
-               return rewriteValueAMD64_OpSubMaskedInt8x32(v)
-       case OpSubMaskedInt8x64:
-               return rewriteValueAMD64_OpSubMaskedInt8x64(v)
-       case OpSubMaskedUint16x16:
-               return rewriteValueAMD64_OpSubMaskedUint16x16(v)
-       case OpSubMaskedUint16x32:
-               return rewriteValueAMD64_OpSubMaskedUint16x32(v)
-       case OpSubMaskedUint16x8:
-               return rewriteValueAMD64_OpSubMaskedUint16x8(v)
-       case OpSubMaskedUint32x16:
-               return rewriteValueAMD64_OpSubMaskedUint32x16(v)
-       case OpSubMaskedUint32x4:
-               return rewriteValueAMD64_OpSubMaskedUint32x4(v)
-       case OpSubMaskedUint32x8:
-               return rewriteValueAMD64_OpSubMaskedUint32x8(v)
-       case OpSubMaskedUint64x2:
-               return rewriteValueAMD64_OpSubMaskedUint64x2(v)
-       case OpSubMaskedUint64x4:
-               return rewriteValueAMD64_OpSubMaskedUint64x4(v)
-       case OpSubMaskedUint64x8:
-               return rewriteValueAMD64_OpSubMaskedUint64x8(v)
-       case OpSubMaskedUint8x16:
-               return rewriteValueAMD64_OpSubMaskedUint8x16(v)
-       case OpSubMaskedUint8x32:
-               return rewriteValueAMD64_OpSubMaskedUint8x32(v)
-       case OpSubMaskedUint8x64:
-               return rewriteValueAMD64_OpSubMaskedUint8x64(v)
        case OpSubPairsFloat32x4:
                v.Op = OpAMD64VHSUBPS128
                return true
@@ -5673,30 +4005,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpSubSaturatedInt8x64:
                v.Op = OpAMD64VPSUBSB512
                return true
-       case OpSubSaturatedMaskedInt16x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v)
-       case OpSubSaturatedMaskedInt16x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v)
-       case OpSubSaturatedMaskedInt16x8:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v)
-       case OpSubSaturatedMaskedInt8x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v)
-       case OpSubSaturatedMaskedInt8x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v)
-       case OpSubSaturatedMaskedInt8x64:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v)
-       case OpSubSaturatedMaskedUint16x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v)
-       case OpSubSaturatedMaskedUint16x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v)
-       case OpSubSaturatedMaskedUint16x8:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v)
-       case OpSubSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v)
-       case OpSubSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v)
-       case OpSubSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
        case OpSubSaturatedUint16x16:
                v.Op = OpAMD64VPSUBUSW256
                return true
@@ -5794,18 +4102,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpTruncScaledFloat64x4(v)
        case OpTruncScaledFloat64x8:
                return rewriteValueAMD64_OpTruncScaledFloat64x8(v)
-       case OpTruncScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v)
-       case OpTruncScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v)
-       case OpTruncScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v)
-       case OpTruncScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v)
-       case OpTruncScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v)
-       case OpTruncScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v)
        case OpTruncScaledResidueFloat32x16:
                return rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v)
        case OpTruncScaledResidueFloat32x4:
@@ -5818,18 +4114,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v)
        case OpTruncScaledResidueFloat64x8:
                return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v)
-       case OpTruncScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v)
-       case OpTruncScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v)
-       case OpTruncScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v)
-       case OpTruncScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v)
-       case OpTruncScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
-       case OpTruncScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
        case OpWB:
                v.Op = OpAMD64LoweredWB
                return true
@@ -5881,30 +4165,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpXorInt8x64:
                v.Op = OpAMD64VPXORD512
                return true
-       case OpXorMaskedInt32x16:
-               return rewriteValueAMD64_OpXorMaskedInt32x16(v)
-       case OpXorMaskedInt32x4:
-               return rewriteValueAMD64_OpXorMaskedInt32x4(v)
-       case OpXorMaskedInt32x8:
-               return rewriteValueAMD64_OpXorMaskedInt32x8(v)
-       case OpXorMaskedInt64x2:
-               return rewriteValueAMD64_OpXorMaskedInt64x2(v)
-       case OpXorMaskedInt64x4:
-               return rewriteValueAMD64_OpXorMaskedInt64x4(v)
-       case OpXorMaskedInt64x8:
-               return rewriteValueAMD64_OpXorMaskedInt64x8(v)
-       case OpXorMaskedUint32x16:
-               return rewriteValueAMD64_OpXorMaskedUint32x16(v)
-       case OpXorMaskedUint32x4:
-               return rewriteValueAMD64_OpXorMaskedUint32x4(v)
-       case OpXorMaskedUint32x8:
-               return rewriteValueAMD64_OpXorMaskedUint32x8(v)
-       case OpXorMaskedUint64x2:
-               return rewriteValueAMD64_OpXorMaskedUint64x2(v)
-       case OpXorMaskedUint64x4:
-               return rewriteValueAMD64_OpXorMaskedUint64x4(v)
-       case OpXorMaskedUint64x8:
-               return rewriteValueAMD64_OpXorMaskedUint64x8(v)
        case OpXorUint16x16:
                v.Op = OpAMD64VPXOR256
                return true
@@ -27893,66 +26153,6 @@ func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28007,66 +26207,6 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28121,66 +26261,6 @@ func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28235,66 +26315,6 @@ func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28349,66 +26369,6 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28463,66 +26423,6 @@ func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -29423,27011 +27323,11273 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAbsMaskedInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpAddr(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt16x16 x mask)
-       // result: (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Addr {sym} base)
+       // result: (LEAQ {sym} base)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               sym := auxToSym(v.Aux)
+               base := v_0
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v.AddArg(base)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AbsMaskedInt16x32 x mask)
-       // result: (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicAdd32 ptr val mem)
+       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64AddTupleFirst32)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg2(val, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AbsMaskedInt16x8 x mask)
-       // result: (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicAdd64 ptr val mem)
+       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64AddTupleFirst64)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg2(val, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x16 x mask)
-       // result: (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicAnd32 ptr val mem)
+       // result: (ANDLlock ptr val mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ANDLlock)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x4 x mask)
-       // result: (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicAnd32value ptr val mem)
+       // result: (LoweredAtomicAnd32 ptr val mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicAnd32)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x8 x mask)
-       // result: (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicAnd64value ptr val mem)
+       // result: (LoweredAtomicAnd64 ptr val mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicAnd64)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x2 x mask)
-       // result: (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (AtomicAnd8 ptr val mem)
+       // result: (ANDBlock ptr val mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ANDBlock)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x4 x mask)
-       // result: (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
+       // result: (CMPXCHGLlock ptr old new_ mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               old := v_1
+               new_ := v_2
+               mem := v_3
+               v.reset(OpAMD64CMPXCHGLlock)
+               v.AddArg4(ptr, old, new_, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x8 x mask)
-       // result: (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
+       // result: (CMPXCHGQlock ptr old new_ mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               old := v_1
+               new_ := v_2
+               mem := v_3
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AddArg4(ptr, old, new_, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt8x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x16 x mask)
-       // result: (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (AtomicExchange32 ptr val mem)
+       // result: (XCHGL val ptr mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGL)
+               v.AddArg3(val, ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x32 x mask)
-       // result: (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (AtomicExchange64 ptr val mem)
+       // result: (XCHGQ val ptr mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGQ)
+               v.AddArg3(val, ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAbsMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x64 x mask)
-       // result: (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (AtomicExchange8 ptr val mem)
+       // result: (XCHGB val ptr mem)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGB)
+               v.AddArg3(val, ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask)
-       // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicLoad32 ptr mem)
+       // result: (MOVLatomicload ptr mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg2(ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask)
-       // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicLoad64 ptr mem)
+       // result: (MOVQatomicload ptr mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg2(ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask)
-       // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicLoad8 ptr mem)
+       // result: (MOVBatomicload ptr mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVBatomicload)
+               v.AddArg2(ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x16 x y z mask)
-       // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicLoadPtr ptr mem)
+       // result: (MOVQatomicload ptr mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg2(ptr, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x4 x y z mask)
-       // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicOr32 ptr val mem)
+       // result: (ORLlock ptr val mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ORLlock)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x8 x y z mask)
-       // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicOr32value ptr val mem)
+       // result: (LoweredAtomicOr32 ptr val mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicOr32)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask)
-       // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicOr64value ptr val mem)
+       // result: (LoweredAtomicOr64 ptr val mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicOr64)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask)
-       // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicOr8 ptr val mem)
+       // result: (ORBlock ptr val mem)
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ORBlock)
+               v.AddArg3(ptr, val, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicStore32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask)
-       // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore32 ptr val mem)
+       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
        for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStore64(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedFloat32x16 x y mask)
-       // result: (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore64 ptr val mem)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStore8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedFloat32x4 x y mask)
-       // result: (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore8 ptr val mem)
+       // result: (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedFloat32x8 x y mask)
-       // result: (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedFloat64x2 x y mask)
-       // result: (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen16 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSRL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedFloat64x4 x y mask)
-       // result: (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (BitLen16 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x))))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAddMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedFloat64x8 x y mask)
-       // result: (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen32 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64)
+               v1.AuxInt = int32ToAuxInt(1)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v2.AddArg(x)
+               v1.AddArg2(v2, v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpAddMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt16x16 x y mask)
-       // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (BitLen32 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL x)))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAddMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedInt16x32 x y mask)
-       // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen64 <t> x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
+               v1 := b.NewValue0(v.Pos, OpSelect0, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v3.AuxInt = int64ToAuxInt(-1)
+               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4.AddArg(v2)
+               v0.AddArg3(v1, v3, v4)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpAddMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt16x8 x y mask)
-       // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (BitLen64 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-64] (LZCNTQ x)))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-64)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAddMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedInt32x16 x y mask)
-       // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen8 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSRL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (BitLen8 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
+       for {
+               t := v.Type
+               x := v_0
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAddMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBswap16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt32x4 x y mask)
-       // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Bswap16 x)
+       // result: (ROLWconst [8] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = int8ToAuxInt(8)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeil(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt32x8 x y mask)
-       // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ceil x)
+       // result: (ROUNDSD [2] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x2 x y mask)
-       // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CeilFloat32x4 x)
+       // result: (VROUNDPS128 [2] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x4 x y mask)
-       // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CeilFloat32x8 x)
+       // result: (VROUNDPS256 [2] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x8 x y mask)
-       // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CeilFloat64x2 x)
+       // result: (VROUNDPD128 [2] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x16 x y mask)
-       // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CeilFloat64x4 x)
+       // result: (VROUNDPD256 [2] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x32 x y mask)
-       // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x64 x y mask)
-       // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x16 x y mask)
-       // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x32 x y mask)
-       // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x8 x y mask)
-       // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x16 x y mask)
-       // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x4 x y mask)
-       // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CeilScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x8 x y mask)
-       // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CeilScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+2] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint64x2 x y mask)
-       // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x16 x mask)
+       // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint64x4 x y mask)
-       // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x4 x mask)
+       // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint64x8 x y mask)
-       // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x8 x mask)
+       // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint8x16 x y mask)
-       // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x2 x mask)
+       // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint8x32 x y mask)
-       // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x4 x mask)
+       // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddMaskedUint8x64 x y mask)
-       // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x8 x mask)
+       // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt16x16 x y mask)
-       // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CompressInt16x16 x mask)
+       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked256)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt16x32 x y mask)
-       // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CompressInt16x32 x mask)
+       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked512)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt16x8 x y mask)
-       // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CompressInt16x8 x mask)
+       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked128)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt8x16 x y mask)
-       // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressInt32x16 x mask)
+       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt8x32 x y mask)
-       // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressInt32x4 x mask)
+       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedInt8x64 x y mask)
-       // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CompressInt32x8 x mask)
+       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint16x16 x y mask)
-       // result: (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CompressInt64x2 x mask)
+       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint16x32 x y mask)
-       // result: (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CompressInt64x4 x mask)
+       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint16x8 x y mask)
-       // result: (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CompressInt64x8 x mask)
+       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint8x16 x y mask)
-       // result: (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressInt8x16 x mask)
+       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked128)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint8x32 x y mask)
-       // result: (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressInt8x32 x mask)
+       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked256)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AddSaturatedMaskedUint8x64 x y mask)
-       // result: (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
+       // match: (CompressInt8x64 x mask)
+       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked512)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAddr(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Addr {sym} base)
-       // result: (LEAQ {sym} base)
-       for {
-               sym := auxToSym(v.Aux)
-               base := v_0
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v.AddArg(base)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt32x16 x y mask)
-       // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CompressUint16x16 x mask)
+       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt32x4 x y mask)
-       // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CompressUint16x32 x mask)
+       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt32x8 x y mask)
-       // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CompressUint16x8 x mask)
+       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt64x2 x y mask)
-       // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressUint32x16 x mask)
+       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt64x4 x y mask)
-       // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressUint32x4 x mask)
+       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedInt64x8 x y mask)
-       // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressUint32x8 x mask)
+       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint32x16 x y mask)
-       // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CompressUint64x2 x mask)
+       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint32x4 x y mask)
-       // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CompressUint64x4 x mask)
+       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint32x8 x y mask)
-       // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CompressUint64x8 x mask)
+       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint64x2 x y mask)
-       // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressUint8x16 x mask)
+       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint64x4 x y mask)
-       // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressUint8x32 x mask)
+       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndMaskedUint64x8 x y mask)
-       // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressUint8x64 x mask)
+       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpAndNotMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpCondSelect(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (AndNotMaskedInt32x16 x y mask)
-       // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQ y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt32x4 x y mask)
-       // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt32x8 x y mask)
-       // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLT y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x2 x y mask)
-       // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGT y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x4 x y mask)
-       // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x8 x y mask)
-       // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x16 x y mask)
-       // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQHI y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQHI)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x4 x y mask)
-       // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCS y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQCS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x8 x y mask)
-       // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCC y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQCC)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x2 x y mask)
-       // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLS y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x4 x y mask)
-       // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQF y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x8 x y mask)
-       // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNEF y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicAdd32 ptr val mem)
-       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGTF y x cond)
        for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64AddTupleFirst32)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg2(val, v0)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGTF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicAdd64 ptr val mem)
-       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGEF y x cond)
        for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64AddTupleFirst64)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg2(val, v0)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd32 ptr val mem)
-       // result: (ANDLlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ANDLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd32value ptr val mem)
-       // result: (LoweredAtomicAnd32 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicAnd32)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd64value ptr val mem)
-       // result: (LoweredAtomicAnd64 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicAnd64)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd8 ptr val mem)
-       // result: (ANDBlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ANDBlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
-       // result: (CMPXCHGLlock ptr old new_ mem)
-       for {
-               ptr := v_0
-               old := v_1
-               new_ := v_2
-               mem := v_3
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AddArg4(ptr, old, new_, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
-       // result: (CMPXCHGQlock ptr old new_ mem)
-       for {
-               ptr := v_0
-               old := v_1
-               new_ := v_2
-               mem := v_3
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AddArg4(ptr, old, new_, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange32 ptr val mem)
-       // result: (XCHGL val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGL)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange64 ptr val mem)
-       // result: (XCHGQ val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGQ)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange8 ptr val mem)
-       // result: (XCHGB val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGB)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad32 ptr mem)
-       // result: (MOVLatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad64 ptr mem)
-       // result: (MOVQatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad8 ptr mem)
-       // result: (MOVBatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVBatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoadPtr ptr mem)
-       // result: (MOVQatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr32 ptr val mem)
-       // result: (ORLlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ORLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr32value ptr val mem)
-       // result: (LoweredAtomicOr32 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicOr32)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr64value ptr val mem)
-       // result: (LoweredAtomicOr64 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicOr64)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr8 ptr val mem)
-       // result: (ORBlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ORBlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore32 ptr val mem)
-       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore64 ptr val mem)
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore8 ptr val mem)
-       // result: (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x16 x y mask)
-       // result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQ y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQ)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x32 x y mask)
-       // result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x8 x y mask)
-       // result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLT y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x16 x y mask)
-       // result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGT y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x32 x y mask)
-       // result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x64 x y mask)
-       // result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGE y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBitLen16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen16 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
+               if v_2.Op != OpAMD64SETGE {
                        break
                }
-               v.reset(OpAMD64BSRL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg2(v1, v1)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGE)
+               v.AddArg3(y, x, cond)
                return true
        }
-       // match: (BitLen16 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x))))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLHI y x cond)
        for {
                t := v.Type
                x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               y := v_1
+               if v_2.Op != OpAMD64SETA {
                        break
                }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type)
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen32 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                        break
                }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64)
-               v1.AuxInt = int32ToAuxInt(1)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v2.AddArg(x)
-               v1.AddArg2(v2, v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpAMD64CMOVLHI)
+               v.AddArg3(y, x, cond)
                return true
        }
-       // match: (BitLen32 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL x)))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCS y x cond)
        for {
                t := v.Type
                x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               y := v_1
+               if v_2.Op != OpAMD64SETB {
                        break
                }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen64 <t> x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(1)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
-               v1 := b.NewValue0(v.Pos, OpSelect0, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v3.AuxInt = int64ToAuxInt(-1)
-               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4.AddArg(v2)
-               v0.AddArg3(v1, v3, v4)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (BitLen64 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-64] (LZCNTQ x)))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-64)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen8 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSRL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg2(v1, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (BitLen8 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                        break
                }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type)
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedFloat64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedFloat64x2 x mask)
-       // result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedFloat64x2 x mask)
-       // result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBswap16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Bswap16 x)
-       // result: (ROLWconst [8] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = int8ToAuxInt(8)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeil(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ceil x)
-       // result: (ROUNDSD [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat32x4 x)
-       // result: (VROUNDPS128 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat32x8 x)
-       // result: (VROUNDPS256 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat64x2 x)
-       // result: (VROUNDPD128 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat64x4 x)
-       // result: (VROUNDPD256 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x16 x mask)
-       // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x4 x mask)
-       // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x8 x mask)
-       // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x2 x mask)
-       // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x4 x mask)
-       // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x8 x mask)
-       // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x16 x mask)
-       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x32 x mask)
-       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x8 x mask)
-       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x16 x mask)
-       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x4 x mask)
-       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x8 x mask)
-       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x2 x mask)
-       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x4 x mask)
-       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x8 x mask)
-       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x16 x mask)
-       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x32 x mask)
-       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x64 x mask)
-       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x16 x mask)
-       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x32 x mask)
-       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x8 x mask)
-       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x16 x mask)
-       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x4 x mask)
-       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x8 x mask)
-       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x2 x mask)
-       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x4 x mask)
-       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x8 x mask)
-       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x16 x mask)
-       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x32 x mask)
-       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x64 x mask)
-       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCondSelect(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
-       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
-       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
-       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
-       // result: (CMOVLNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
-       // result: (CMOVWNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpConst16(v *Value) bool {
-       // match: (Const16 [c])
-       // result: (MOVLconst [int32(c)])
-       for {
-               c := auxIntToInt16(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConst8(v *Value) bool {
-       // match: (Const8 [c])
-       // result: (MOVLconst [int32(c)])
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConstBool(v *Value) bool {
-       // match: (ConstBool [c])
-       // result: (MOVLconst [b2i32(c)])
-       for {
-               c := auxIntToBool(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(b2i32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConstNil(v *Value) bool {
-       // match: (ConstNil )
-       // result: (MOVQconst [0])
-       for {
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x16 x mask)
-       // result: (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x4 x mask)
-       // result: (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x8 x mask)
-       // result: (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x16 x mask)
-       // result: (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x4 x mask)
-       // result: (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x8 x mask)
-       // result: (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz16 x)
-       // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
-       for {
-               x := v_0
-               v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1 << 16)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz16NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz16NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz32 x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz32 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64)
-               v1.AuxInt = int8ToAuxInt(32)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz32NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz32NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz64 x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTQ x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz64 <t> x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQ)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v2.AuxInt = int64ToAuxInt(64)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz64NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTQ x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz64NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSFQ x))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz8 x)
-       // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
-       for {
-               x := v_0
-               v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1 << 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz8NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz8NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask16x16 <t> x)
-       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask32x16 <t> x)
-       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask8x16 <t> x)
-       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt32toMask16x32 <t> x)
-       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVDk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt32toMask8x32 <t> x)
-       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVDk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt64toMask8x64 <t> x)
-       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask16x8 <t> x)
-       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask32x4 <t> x)
-       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask32x8 <t> x)
-       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x2 <t> x)
-       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x4 <t> x)
-       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x8 <t> x)
-       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x32to32 <t> x)
-       // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVDi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x4to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x2to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x4to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x32to32 <t> x)
-       // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVDi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x64to64 <t> x)
-       // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVQi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div16 [a] x y)
-       // result: (Select0 (DIVW [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv16u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div16u x y)
-       // result: (Select0 (DIVWU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div32 [a] x y)
-       // result: (Select0 (DIVL [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div32u x y)
-       // result: (Select0 (DIVLU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div64 [a] x y)
-       // result: (Select0 (DIVQ [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv64u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div64u x y)
-       // result: (Select0 (DIVQU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div8 x y)
-       // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv8u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div8u x y)
-       // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x16 x y mask)
-       // result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x4 x y mask)
-       // result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x8 x y mask)
-       // result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x2 x y mask)
-       // result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x4 x y mask)
-       // result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x8 x y mask)
-       // result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x16 x y mask)
-       // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x32 x y mask)
-       // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x8 x y mask)
-       // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x16 x y mask)
-       // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x32 x y mask)
-       // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x64 x y mask)
-       // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq16 x y)
-       // result: (SETEQ (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq32 x y)
-       // result: (SETEQ (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq32F x y)
-       // result: (SETEQF (UCOMISS x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq64 x y)
-       // result: (SETEQ (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq64F x y)
-       // result: (SETEQF (UCOMISD x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq8 x y)
-       // result: (SETEQ (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqB(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (EqB x y)
-       // result: (SETEQ (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (EqPtr x y)
-       // result: (SETEQ (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat32x4 x y)
-       // result: (VCMPPS128 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat32x8 x y)
-       // result: (VCMPPS256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat64x2 x y)
-       // result: (VCMPPD128 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat64x4 x y)
-       // result: (VCMPPD256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x16 x mask)
-       // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x4 x mask)
-       // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x8 x mask)
-       // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x2 x mask)
-       // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x4 x mask)
-       // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x8 x mask)
-       // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x16 x mask)
-       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x32 x mask)
-       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x8 x mask)
-       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x16 x mask)
-       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x4 x mask)
-       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x8 x mask)
-       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x2 x mask)
-       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x4 x mask)
-       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x8 x mask)
-       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x16 x mask)
-       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x32 x mask)
-       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x64 x mask)
-       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x16 x mask)
-       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x32 x mask)
-       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x8 x mask)
-       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x16 x mask)
-       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x4 x mask)
-       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x8 x mask)
-       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x2 x mask)
-       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x4 x mask)
-       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x8 x mask)
-       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x16 x mask)
-       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x32 x mask)
-       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x64 x mask)
-       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFMA(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (FMA x y z)
-       // result: (VFMADD231SD z x y)
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               v.reset(OpAMD64VFMADD231SD)
-               v.AddArg3(z, x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloor(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Floor x)
-       // result: (ROUNDSD [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat32x4 x)
-       // result: (VROUNDPS128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat32x8 x)
-       // result: (VROUNDPS256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat64x2 x)
-       // result: (VROUNDPD128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat64x4 x)
-       // result: (VROUNDPD256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x16 x y mask)
-       // result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x32 x y mask)
-       // result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x64 x y mask)
-       // result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetG(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetG mem)
-       // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal
-       // result: (LoweredGetG mem)
-       for {
-               mem := v_0
-               if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) {
-                       break
-               }
-               v.reset(OpAMD64LoweredGetG)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat32x16 x)
-       // result: (VEXTRACTF64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat32x8 x)
-       // result: (VEXTRACTF128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat64x4 x)
-       // result: (VEXTRACTF128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat64x8 x)
-       // result: (VEXTRACTF64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt16x16 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt16x32 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt32x16 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt32x8 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt64x4 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt64x8 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt8x32 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt8x64 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint16x16 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint16x32 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint32x16 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint32x8 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint64x4 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint64x8 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint8x32 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint8x64 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat32x16 x)
-       // result: (VEXTRACTF64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat32x8 x)
-       // result: (VEXTRACTF128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat64x4 x)
-       // result: (VEXTRACTF128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat64x8 x)
-       // result: (VEXTRACTF64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt16x16 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt16x32 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt32x16 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt32x8 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt64x4 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt64x8 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt8x32 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt8x64 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint16x16 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint16x32 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint32x16 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint32x8 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint64x4 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint64x8 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint8x32 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint8x64 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat32x4 x y)
-       // result: (VCMPPS128 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat32x8 x y)
-       // result: (VCMPPS256 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat64x2 x y)
-       // result: (VCMPPD128 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat64x4 x y)
-       // result: (VCMPPD256 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat32x4 x y)
-       // result: (VCMPPS128 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat32x8 x y)
-       // result: (VCMPPS256 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat64x2 x y)
-       // result: (VCMPPD128 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat64x4 x y)
-       // result: (VCMPPD256 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (HasCPUFeature {s})
-       // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s})))
-       for {
-               s := auxToSym(v.Aux)
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64)
-               v1.Aux = symToAux(s)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsInBounds idx len)
-       // result: (SETB (CMPQ idx len))
-       for {
-               idx := v_0
-               len := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(idx, len)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat32x4 x y)
-       // result: (VCMPPS128 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat32x8 x y)
-       // result: (VCMPPS256 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat64x2 x y)
-       // result: (VCMPPD128 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat64x4 x y)
-       // result: (VCMPPD256 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsNonNil p)
-       // result: (SETNE (TESTQ p p))
-       for {
-               p := v_0
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags)
-               v0.AddArg2(p, p)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsSliceInBounds idx len)
-       // result: (SETBE (CMPQ idx len))
-       for {
-               idx := v_0
-               len := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(idx, len)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq16 x y)
-       // result: (SETLE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq16U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq16U x y)
-       // result: (SETBE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32 x y)
-       // result: (SETLE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32F x y)
-       // result: (SETGEF (UCOMISS y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32U x y)
-       // result: (SETBE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64 x y)
-       // result: (SETLE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64F x y)
-       // result: (SETGEF (UCOMISD y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64U x y)
-       // result: (SETBE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq8 x y)
-       // result: (SETLE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq8U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq8U x y)
-       // result: (SETBE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less16 x y)
-       // result: (SETL (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess16U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less16U x y)
-       // result: (SETB (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32 x y)
-       // result: (SETL (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32F x y)
-       // result: (SETGF (UCOMISS y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32U x y)
-       // result: (SETB (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64 x y)
-       // result: (SETL (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64F x y)
-       // result: (SETGF (UCOMISD y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64U x y)
-       // result: (SETB (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less8 x y)
-       // result: (SETL (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess8U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less8U x y)
-       // result: (SETB (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat32x4 x y)
-       // result: (VCMPPS128 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat32x8 x y)
-       // result: (VCMPPS256 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat64x2 x y)
-       // result: (VCMPPD128 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat64x4 x y)
-       // result: (VCMPPD256 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat32x4 x y)
-       // result: (VCMPPS128 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat32x8 x y)
-       // result: (VCMPPS256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat64x2 x y)
-       // result: (VCMPPD128 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat64x4 x y)
-       // result: (VCMPPD256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoad(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (Load <t> ptr mem)
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (MOVQload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is32BitInt(t)
-       // result: (MOVLload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is16BitInt(t)
-       // result: (MOVWload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: (t.IsBoolean() || is8BitInt(t))
-       // result: (MOVBload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.IsBoolean() || is8BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is32BitFloat(t)
-       // result: (MOVSSload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is32BitFloat(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is64BitFloat(t)
-       // result: (MOVSDload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is64BitFloat(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 16
-       // result: (VMOVDQUload128 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload128)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 32
-       // result: (VMOVDQUload256 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload256)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 64
-       // result: (VMOVDQUload512 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload512)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x16 <t> ptr mem)
-       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x32 <t> ptr mem)
-       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x8 <t> ptr mem)
-       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x16 <t> ptr mem)
-       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x4 <t> ptr mem)
-       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x8 <t> ptr mem)
-       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x2 <t> ptr mem)
-       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x4 <t> ptr mem)
-       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x8 <t> ptr mem)
-       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x16 <t> ptr mem)
-       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x32 <t> ptr mem)
-       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x64 <t> ptr mem)
-       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked16 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK16load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK32load128 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load128)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK32load256 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load256)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK64load128 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load128)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK64load256 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load256)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked8 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK8load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LocalAddr <t> {sym} base mem)
-       // cond: t.Elem().HasPointers()
-       // result: (LEAQ {sym} (SPanchored base mem))
-       for {
-               t := v.Type
-               sym := auxToSym(v.Aux)
-               base := v_0
-               mem := v_1
-               if !(t.Elem().HasPointers()) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
-               v0.AddArg2(base, mem)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (LocalAddr <t> {sym} base _)
-       // cond: !t.Elem().HasPointers()
-       // result: (LEAQ {sym} base)
-       for {
-               t := v.Type
-               sym := auxToSym(v.Aux)
-               base := v_0
-               if !(!t.Elem().HasPointers()) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v.AddArg(base)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpMax32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Max32F <t> x y)
-       // result: (Neg32F <t> (Min32F <t> (Neg32F <t> x) (Neg32F <t> y)))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpNeg32F)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpMin32F, t)
-               v1 := b.NewValue0(v.Pos, OpNeg32F, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpNeg32F, t)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMax64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Max64F <t> x y)
-       // result: (Neg64F <t> (Min64F <t> (Neg64F <t> x) (Neg64F <t> y)))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpNeg64F)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpMin64F, t)
-               v1 := b.NewValue0(v.Pos, OpNeg64F, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpNeg64F, t)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x16 x y mask)
-       // result: (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x4 x y mask)
-       // result: (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x8 x y mask)
-       // result: (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x2 x y mask)
-       // result: (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x4 x y mask)
-       // result: (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x8 x y mask)
-       // result: (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x16 x y mask)
-       // result: (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x32 x y mask)
-       // result: (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x8 x y mask)
-       // result: (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x16 x y mask)
-       // result: (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x4 x y mask)
-       // result: (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x8 x y mask)
-       // result: (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x2 x y mask)
-       // result: (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x4 x y mask)
-       // result: (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x8 x y mask)
-       // result: (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x16 x y mask)
-       // result: (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x32 x y mask)
-       // result: (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x64 x y mask)
-       // result: (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x16 x y mask)
-       // result: (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x32 x y mask)
-       // result: (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x8 x y mask)
-       // result: (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x16 x y mask)
-       // result: (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x4 x y mask)
-       // result: (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x8 x y mask)
-       // result: (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x2 x y mask)
-       // result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x4 x y mask)
-       // result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x8 x y mask)
-       // result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x16 x y mask)
-       // result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x32 x y mask)
-       // result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x64 x y mask)
-       // result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMin32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Min32F <t> x y)
-       // result: (POR (MINSS <t> (MINSS <t> x y) x) (MINSS <t> x y))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64POR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
-               v1.AddArg2(x, y)
-               v0.AddArg2(v1, x)
-               v.AddArg2(v0, v1)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMin64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Min64F <t> x y)
-       // result: (POR (MINSD <t> (MINSD <t> x y) x) (MINSD <t> x y))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64POR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
-               v1.AddArg2(x, y)
-               v0.AddArg2(v1, x)
-               v.AddArg2(v0, v1)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x16 x y mask)
-       // result: (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x4 x y mask)
-       // result: (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x8 x y mask)
-       // result: (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x2 x y mask)
-       // result: (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x4 x y mask)
-       // result: (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x8 x y mask)
-       // result: (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x16 x y mask)
-       // result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x32 x y mask)
-       // result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x8 x y mask)
-       // result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x16 x y mask)
-       // result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x4 x y mask)
-       // result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x8 x y mask)
-       // result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x2 x y mask)
-       // result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x4 x y mask)
-       // result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x8 x y mask)
-       // result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x16 x y mask)
-       // result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x32 x y mask)
-       // result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x64 x y mask)
-       // result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x16 x y mask)
-       // result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x32 x y mask)
-       // result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x8 x y mask)
-       // result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x16 x y mask)
-       // result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x4 x y mask)
-       // result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x8 x y mask)
-       // result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x2 x y mask)
-       // result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x4 x y mask)
-       // result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x8 x y mask)
-       // result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x16 x y mask)
-       // result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x32 x y mask)
-       // result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x64 x y mask)
-       // result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod16 [a] x y)
-       // result: (Select1 (DIVW [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod16u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod16u x y)
-       // result: (Select1 (DIVWU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod32 [a] x y)
-       // result: (Select1 (DIVL [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod32u x y)
-       // result: (Select1 (DIVLU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod64 [a] x y)
-       // result: (Select1 (DIVQ [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod64u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod64u x y)
-       // result: (Select1 (DIVQU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod8 x y)
-       // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod8u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod8u x y)
-       // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMove(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Move [0] _ _ mem)
-       // result: mem
-       for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               mem := v_2
-               v.copyOf(mem)
-               return true
-       }
-       // match: (Move [1] dst src mem)
-       // result: (MOVBstore dst (MOVBload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 1 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [2] dst src mem)
-       // result: (MOVWstore dst (MOVWload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 2 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [4] dst src mem)
-       // result: (MOVLstore dst (MOVLload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 4 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [8] dst src mem)
-       // result: (MOVQstore dst (MOVQload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [16] dst src mem)
-       // result: (MOVOstore dst (MOVOload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 16 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVOstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [3] dst src mem)
-       // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 3 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(2)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(2)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [5] dst src mem)
-       // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 5 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(4)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(4)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [6] dst src mem)
-       // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 6 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(4)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = int32ToAuxInt(4)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [7] dst src mem)
-       // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 7 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(3)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(3)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [9] dst src mem)
-       // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 9 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [10] dst src mem)
-       // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 10 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [11] dst src mem)
-       // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 11 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(7)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [12] dst src mem)
-       // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 12 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s >= 13 && s <= 15
-       // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s >= 13 && s <= 15) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(int32(s - 8))
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AuxInt = int32ToAuxInt(int32(s - 8))
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > 16 && s < 192 && logLargeCopy(v, s)
-       // result: (LoweredMove [s] dst src mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > 16 && s < 192 && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64LoweredMove)
-               v.AuxInt = int64ToAuxInt(s)
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)
-       // result: (LoweredMoveLoop [s] dst src mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64LoweredMoveLoop)
-               v.AuxInt = int64ToAuxInt(s)
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > repMoveThreshold && s%8 != 0
-       // result: (Move [s-s%8] (OffPtr <dst.Type> dst [s%8]) (OffPtr <src.Type> src [s%8]) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > repMoveThreshold && s%8 != 0) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(s - s%8)
-               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-               v0.AuxInt = int64ToAuxInt(s % 8)
-               v0.AddArg(dst)
-               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-               v1.AuxInt = int64ToAuxInt(s % 8)
-               v1.AddArg(src)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v3.AddArg2(src, mem)
-               v2.AddArg3(dst, v3, mem)
-               v.AddArg3(v0, v1, v2)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)
-       // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64REPMOVSQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(s / 8)
-               v.AddArg4(dst, src, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x16 x y z mask)
-       // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x4 x y z mask)
-       // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x8 x y z mask)
-       // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x2 x y z mask)
-       // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x4 x y z mask)
-       // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x8 x y z mask)
-       // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x16 x y z mask)
-       // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x4 x y z mask)
-       // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x8 x y z mask)
-       // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x2 x y z mask)
-       // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x4 x y z mask)
-       // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x8 x y z mask)
-       // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x16 x y mask)
-       // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x32 x y mask)
-       // result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x8 x y mask)
-       // result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x16 x y mask)
-       // result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x32 x y mask)
-       // result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x8 x y mask)
-       // result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x16 x y mask)
-       // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x4 x y mask)
-       // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x8 x y mask)
-       // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x2 x y mask)
-       // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x4 x y mask)
-       // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x8 x y mask)
-       // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x16 x y mask)
-       // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x32 x y mask)
-       // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x8 x y mask)
-       // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x16 x y mask)
-       // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x4 x y mask)
-       // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x8 x y mask)
-       // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x2 x y mask)
-       // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x4 x y mask)
-       // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x8 x y mask)
-       // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x16 x y mask)
-       // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x32 x y mask)
-       // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x8 x y mask)
-       // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x16 x y mask)
-       // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x4 x y mask)
-       // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x8 x y mask)
-       // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x2 x y mask)
-       // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x4 x y mask)
-       // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x8 x y mask)
-       // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x16 x y z mask)
-       // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x4 x y z mask)
-       // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x8 x y z mask)
-       // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x2 x y z mask)
-       // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x4 x y z mask)
-       // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x8 x y z mask)
-       // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeg32F(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Neg32F x)
-       // result: (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
-       for {
-               x := v_0
-               v.reset(OpAMD64PXOR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32)
-               v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1)))
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeg64F(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Neg64F x)
-       // result: (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
-       for {
-               x := v_0
-               v.reset(OpAMD64PXOR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64)
-               v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1))
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq16 x y)
-       // result: (SETNE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq32 x y)
-       // result: (SETNE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq32F x y)
-       // result: (SETNEF (UCOMISS x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq64 x y)
-       // result: (SETNE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq64F x y)
-       // result: (SETNEF (UCOMISD x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq8 x y)
-       // result: (SETNE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeqB(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (NeqB x y)
-       // result: (SETNE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeqPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (NeqPtr x y)
-       // result: (SETNE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNot(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Not x)
-       // result: (XORLconst [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat32x4 x y)
-       // result: (VCMPPS128 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat32x8 x y)
-       // result: (VCMPPS256 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat64x2 x y)
-       // result: (VCMPPD128 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat64x4 x y)
-       // result: (VCMPPD256 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
+               v.reset(OpAMD64CMOVLCS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCC y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCC)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLS y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQF y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNEF y x cond)
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOffPtr(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (OffPtr [off] ptr)
-       // cond: is32Bit(off)
-       // result: (ADDQconst [int32(off)] ptr)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGTF y x cond)
        for {
-               off := auxIntToInt64(v.AuxInt)
-               ptr := v_0
-               if !(is32Bit(off)) {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(int32(off))
-               v.AddArg(ptr)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGTF)
+               v.AddArg3(y, x, cond)
                return true
        }
-       // match: (OffPtr [off] ptr)
-       // result: (ADDQ (MOVQconst [off]) ptr)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGEF y x cond)
        for {
-               off := auxIntToInt64(v.AuxInt)
-               ptr := v_0
-               v.reset(OpAMD64ADDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(off)
-               v.AddArg2(v0, ptr)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x16 x mask)
-       // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQ y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQ)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x32 x mask)
-       // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNE y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x8 x mask)
-       // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLT y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x16 x mask)
-       // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGT y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGT)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x4 x mask)
-       // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLE y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x8 x mask)
-       // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGE y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x2 x mask)
-       // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWHI y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x4 x mask)
-       // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCS y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x8 x mask)
-       // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCC y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x16 x mask)
-       // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLS y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLS)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x32 x mask)
-       // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQF y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x64 x mask)
-       // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNEF y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x16 x mask)
-       // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGTF y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGTF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x32 x mask)
-       // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGEF y x cond)
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGEF)
+               v.AddArg3(y, x, cond)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x8 x mask)
-       // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
+       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x16 x mask)
-       // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
+       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x4 x mask)
-       // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
+       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x8 x mask)
-       // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x2 x mask)
-       // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+       // result: (CMOVLNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x4 x mask)
-       // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+       // result: (CMOVWNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOnesCountMaskedUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x8 x mask)
-       // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConst16(v *Value) bool {
+       // match: (Const16 [c])
+       // result: (MOVLconst [int32(c)])
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToInt16(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(int32(c))
                return true
        }
 }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x16 x mask)
-       // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConst8(v *Value) bool {
+       // match: (Const8 [c])
+       // result: (MOVLconst [int32(c)])
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToInt8(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(int32(c))
                return true
        }
 }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x32 x mask)
-       // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConstBool(v *Value) bool {
+       // match: (ConstBool [c])
+       // result: (MOVLconst [b2i32(c)])
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToBool(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(b2i32(c))
                return true
        }
 }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x64 x mask)
-       // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConstNil(v *Value) bool {
+       // match: (ConstNil )
+       // result: (MOVQconst [0])
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(0)
                return true
        }
 }
-func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (OrMaskedInt32x16 x y mask)
-       // result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz16 x)
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64BSFL)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 16)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpOrMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt32x4 x y mask)
-       // result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Ctz16NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpOrMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt32x8 x y mask)
-       // result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ctz16NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOrMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (OrMaskedInt64x2 x y mask)
-       // result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz32 x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpOrMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt64x4 x y mask)
-       // result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Ctz32 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64)
+               v1.AuxInt = int8ToAuxInt(32)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOrMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt64x8 x y mask)
-       // result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Ctz32NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpOrMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint32x16 x y mask)
-       // result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Ctz32NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOrMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (OrMaskedUint32x4 x y mask)
-       // result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz64 x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTQ x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTQ)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpOrMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint32x8 x y mask)
-       // result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ctz64 <t> x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQ)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v3.AddArg(v1)
+               v.AddArg3(v0, v2, v3)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOrMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (OrMaskedUint64x2 x y mask)
-       // result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz64NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTQ x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTQ)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpOrMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint64x4 x y mask)
-       // result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Ctz64NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSFQ x))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpOrMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (OrMaskedUint64x8 x y mask)
-       // result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz8 x)
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64BSFL)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Permute2MaskedFloat32x16 x y z mask)
-       // result: (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Ctz8NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
        for {
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Permute2MaskedFloat32x4 x y z mask)
-       // result: (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Ctz8NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
        for {
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpPermute2MaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedFloat32x8 x y z mask)
-       // result: (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask16x16 <t> x)
+       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVWk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x16)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedFloat64x2 x y z mask)
-       // result: (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask32x16 <t> x)
+       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVWk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedFloat64x4 x y z mask)
-       // result: (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask8x16 <t> x)
+       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x16)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedFloat64x8 x y z mask)
-       // result: (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Cvt32toMask16x32 <t> x)
+       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVDk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt16x16 x y z mask)
-       // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Cvt32toMask8x32 <t> x)
+       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVDk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x32)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt16x32 x y z mask)
-       // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Cvt64toMask8x64 <t> x)
+       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt16x8 x y z mask)
-       // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask16x8 <t> x)
+       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x8)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt32x16 x y z mask)
-       // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask32x4 <t> x)
+       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x4)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt32x4 x y z mask)
-       // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask32x8 <t> x)
+       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x8)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt32x8 x y z mask)
-       // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x2 <t> x)
+       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x2)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt64x2 x y z mask)
-       // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x4 <t> x)
+       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x4)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt64x4 x y z mask)
-       // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x8 <t> x)
+       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt64x8 x y z mask)
-       // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CvtMask16x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt8x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt8x16 x y z mask)
-       // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CvtMask16x32to32 <t> x)
+       // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVDi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt8x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt8x32 x y z mask)
-       // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CvtMask16x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedInt8x64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedInt8x64 x y z mask)
-       // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CvtMask32x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint16x16 x y z mask)
-       // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CvtMask32x4to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint16x32 x y z mask)
-       // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CvtMask32x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint16x8 x y z mask)
-       // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CvtMask64x2to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint32x16 x y z mask)
-       // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CvtMask64x4to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint32x4 x y z mask)
-       // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CvtMask64x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint32x8 x y z mask)
-       // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CvtMask8x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint64x2 x y z mask)
-       // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CvtMask8x32to32 <t> x)
+       // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVDi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint64x4 x y z mask)
-       // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CvtMask8x64to64 <t> x)
+       // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVQi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint64x8 x y z mask)
-       // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div16 [a] x y)
+       // result: (Select0 (DIVW [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint8x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv16u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint8x16 x y z mask)
-       // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div16u x y)
+       // result: (Select0 (DIVWU x y))
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint8x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint8x32 x y z mask)
-       // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div32 [a] x y)
+       // result: (Select0 (DIVL [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermute2MaskedUint8x64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv32u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Permute2MaskedUint8x64 x y z mask)
-       // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div32u x y)
+       // result: (Select0 (DIVLU x y))
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedFloat32x16 x y mask)
-       // result: (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div64 [a] x y)
+       // result: (Select0 (DIVQ [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv64u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedFloat32x8 x y mask)
-       // result: (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div64u x y)
+       // result: (Select0 (DIVQU x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedFloat64x4 x y mask)
-       // result: (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div8 x y)
+       // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv8u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedFloat64x8 x y mask)
-       // result: (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div8u x y)
+       // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt16x16 x y mask)
-       // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Eq16 x y)
+       // result: (SETEQ (CMPW x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt16x32 x y mask)
-       // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Eq32 x y)
+       // result: (SETEQ (CMPL x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt16x8 x y mask)
-       // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Eq32F x y)
+       // result: (SETEQF (UCOMISS x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt32x16 x y mask)
-       // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Eq64 x y)
+       // result: (SETEQ (CMPQ x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq64F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt32x8 x y mask)
-       // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Eq64F x y)
+       // result: (SETEQF (UCOMISD x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt64x4 x y mask)
-       // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Eq8 x y)
+       // result: (SETEQ (CMPB x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqB(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt64x8 x y mask)
-       // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (EqB x y)
+       // result: (SETEQ (CMPB x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqPtr(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt8x16 x y mask)
-       // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (EqPtr x y)
+       // result: (SETEQ (CMPQ x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedInt8x32 x y mask)
-       // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(0)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedInt8x64 x y mask)
-       // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (EqualFloat32x4 x y)
+       // result: (VCMPPS128 [0] x y)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               y := v_1
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x16 x y mask)
-       // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (EqualFloat32x8 x y)
+       // result: (VCMPPS256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x32 x y mask)
-       // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (EqualFloat64x2 x y)
+       // result: (VCMPPD128 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x8 x y mask)
-       // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (EqualFloat64x4 x y)
+       // result: (VCMPPD256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint32x16 x y mask)
-       // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(0)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint32x8 x y mask)
-       // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint64x4 x y mask)
-       // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint64x8 x y mask)
-       // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint8x16 x y mask)
-       // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint8x32 x y mask)
-       // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPermuteMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (PermuteMaskedUint8x64 x y mask)
-       // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPopCount16(v *Value) bool {
+func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (PopCount16 x)
-       // result: (POPCNTL (MOVWQZX <typ.UInt32> x))
+       // match: (EqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
        for {
                x := v_0
-               v.reset(OpAMD64POPCNTL)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
-               v0.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpPopCount8(v *Value) bool {
+func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (PopCount8 x)
-       // result: (POPCNTL (MOVBQZX <typ.UInt32> x))
+       // match: (EqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
        for {
                x := v_0
-               v.reset(OpAMD64POPCNTL)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
-               v0.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat32x16 x mask)
-       // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x16 x mask)
+       // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked512)
+               v.reset(OpAMD64VEXPANDPSMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat32x4 x mask)
-       // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x4 x mask)
+       // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked128)
+               v.reset(OpAMD64VEXPANDPSMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat32x8 x mask)
-       // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x8 x mask)
+       // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked256)
+               v.reset(OpAMD64VEXPANDPSMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat64x2 x mask)
-       // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x2 x mask)
+       // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked128)
+               v.reset(OpAMD64VEXPANDPDMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat64x4 x mask)
-       // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x4 x mask)
+       // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked256)
+               v.reset(OpAMD64VEXPANDPDMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalMaskedFloat64x8 x mask)
-       // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x8 x mask)
+       // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked512)
+               v.reset(OpAMD64VEXPANDPDMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x16 x mask)
-       // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x16 x mask)
+       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x4 x mask)
-       // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x32 x mask)
+       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x8 x mask)
-       // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x8 x mask)
+       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x2 x mask)
-       // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x16 x mask)
+       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x4 x mask)
-       // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x4 x mask)
+       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x8 x mask)
-       // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x8 x mask)
+       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt32x16 [a] x mask)
-       // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x2 x mask)
+       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt32x4 [a] x mask)
-       // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x4 x mask)
+       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt32x8 [a] x mask)
-       // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x8 x mask)
+       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt64x2 [a] x mask)
-       // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x16 x mask)
+       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt64x4 [a] x mask)
-       // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x32 x mask)
+       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedInt64x8 [a] x mask)
-       // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x64 x mask)
+       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint32x16 [a] x mask)
-       // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x16 x mask)
+       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint32x4 [a] x mask)
-       // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x32 x mask)
+       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint32x8 [a] x mask)
-       // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x8 x mask)
+       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint64x2 [a] x mask)
-       // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x16 x mask)
+       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint64x4 [a] x mask)
-       // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x4 x mask)
+       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllLeftMaskedUint64x8 [a] x mask)
-       // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x8 x mask)
+       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPROLQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt32x16 [a] x mask)
-       // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x2 x mask)
+       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt32x4 [a] x mask)
-       // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x4 x mask)
+       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt32x8 [a] x mask)
-       // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x8 x mask)
+       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt64x2 [a] x mask)
-       // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x16 x mask)
+       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt64x4 [a] x mask)
-       // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x32 x mask)
+       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (RotateAllRightMaskedInt64x8 [a] x mask)
-       // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x64 x mask)
+       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                mask := v_1
-               v.reset(OpAMD64VPRORQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpFMA(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x16 [a] x mask)
-       // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FMA x y z)
+       // result: (VFMADD231SD z x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               z := v_2
+               v.reset(OpAMD64VFMADD231SD)
+               v.AddArg3(z, x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloor(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x4 [a] x mask)
-       // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Floor x)
+       // result: (ROUNDSD [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x8 [a] x mask)
-       // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorFloat32x4 x)
+       // result: (VROUNDPS128 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x2 [a] x mask)
-       // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorFloat32x8 x)
+       // result: (VROUNDPS256 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x4 [a] x mask)
-       // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorFloat64x2 x)
+       // result: (VROUNDPD128 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x8 [a] x mask)
-       // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorFloat64x4 x)
+       // result: (VROUNDPD256 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x16 x y mask)
-       // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x4 x y mask)
-       // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x8 x y mask)
-       // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x2 x y mask)
-       // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x4 x y mask)
-       // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x8 x y mask)
-       // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x16 x y mask)
-       // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x4 x y mask)
-       // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x8 x y mask)
-       // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x2 x y mask)
-       // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x4 x y mask)
-       // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x8 x y mask)
-       // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+1] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetG(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x16 x y mask)
-       // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetG mem)
+       // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal
+       // result: (LoweredGetG mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               mem := v_0
+               if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) {
+                       break
+               }
+               v.reset(OpAMD64LoweredGetG)
+               v.AddArg(mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x4 x y mask)
-       // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetHiFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x8 x y mask)
-       // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetHiFloat32x8 x)
+       // result: (VEXTRACTF128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x2 x y mask)
-       // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetHiFloat64x4 x)
+       // result: (VEXTRACTF128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x4 x y mask)
-       // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetHiFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x8 x y mask)
-       // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x16 x y mask)
-       // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetHiInt16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x4 x y mask)
-       // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetHiInt32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x8 x y mask)
-       // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x2 x y mask)
-       // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetHiInt64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x4 x y mask)
-       // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetHiInt64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x8 x y mask)
-       // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
+func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEven x)
-       // result: (ROUNDSD [0] x)
+       // match: (GetHiInt8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenFloat32x4 x)
-       // result: (VROUNDPS128 [0] x)
+       // match: (GetHiUint16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenFloat32x8 x)
-       // result: (VROUNDPS256 [0] x)
+       // match: (GetHiUint16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenFloat64x2 x)
-       // result: (VROUNDPD128 [0] x)
+       // match: (GetHiUint32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
                x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenFloat64x4 x)
-       // result: (VROUNDPD256 [0] x)
+       // match: (GetHiUint32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
                x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+0] x)
+       // match: (GetHiUint64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+0] x)
+       // match: (GetHiUint64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+0] x)
+       // match: (GetHiUint8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+0] x)
+       // match: (GetHiUint8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+0] x)
+       // match: (GetLoFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+0] x)
+       // match: (GetLoFloat32x8 x)
+       // result: (VEXTRACTF128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetLoFloat64x4 x)
+       // result: (VEXTRACTF128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetLoFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetLoInt16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetLoInt16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetLoInt32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetLoInt32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+0] x)
+       // match: (GetLoInt64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+0] x)
+       // match: (GetLoInt64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+0] x)
+       // match: (GetLoInt8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+0] x)
+       // match: (GetLoInt8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+0] x)
+       // match: (GetLoUint16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool {
        v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+0] x)
+       // match: (GetLoUint16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetLoUint32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetLoUint32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetLoUint64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetLoUint64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetLoUint8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetLoUint8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh16Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+}
+func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterEqualFloat32x4 x y)
+       // result: (VCMPPS128 [13] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(13)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh16Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh16Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+       // match: (GreaterEqualFloat32x8 x y)
+       // result: (VCMPPS256 [13] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(13)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh16Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16])))
+       // match: (GreaterEqualFloat64x2 x y)
+       // result: (VCMPPD128 [13] x y)
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(13)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (Rsh16Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+}
+func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterEqualFloat64x4 x y)
+       // result: (VCMPPD256 [13] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(13)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh16Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
-               v.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16x16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16x32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh16x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh32Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh32Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+}
+func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh32Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh32Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
+       // match: (GreaterFloat32x4 x y)
+       // result: (VCMPPS128 [14] x y)
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(14)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (Rsh32Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterFloat32x8 x y)
+       // result: (VCMPPS256 [14] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(14)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32x16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh32x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32])))))
+       // match: (GreaterFloat64x2 x y)
+       // result: (VCMPPD128 [14] x y)
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(14)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (Rsh32x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterFloat64x4 x y)
+       // result: (VCMPPD256 [14] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(14)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32x32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh32x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh32x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (HasCPUFeature {s})
+       // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s})))
        for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               s := auxToSym(v.Aux)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64)
+               v1.Aux = symToAux(s)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
+func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
+       // match: (IsInBounds idx len)
+       // result: (SETB (CMPQ idx len))
        for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               idx := v_0
+               len := v_1
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(idx, len)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (IsNanFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(3)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh64Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
+       // match: (IsNanFloat32x4 x y)
+       // result: (VCMPPS128 [3] x y)
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(3)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (Rsh64Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (IsNanFloat32x8 x y)
+       // result: (VCMPPS256 [3] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(3)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64x16(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh64x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64])))))
+       // match: (IsNanFloat64x2 x y)
+       // result: (VCMPPD128 [3] x y)
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(3)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (Rsh64x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (IsNanFloat64x4 x y)
+       // result: (VCMPPD256 [3] x y)
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(3)
                v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64x32(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64])))))
+       typ := &b.Func.Config.Types
+       // match: (IsNanFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(3)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (IsNonNil p)
+       // result: (SETNE (TESTQ p p))
        for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               p := v_0
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags)
+               v0.AddArg2(p, p)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
+func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64])))))
+       // match: (IsSliceInBounds idx len)
+       // result: (SETBE (CMPQ idx len))
        for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               idx := v_0
+               len := v_1
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(idx, len)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpLeq16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq16 x y)
+       // result: (SETLE (CMPW x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
+func rewriteValueAMD64_OpLeq16U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh64x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
+       // match: (Leq16U x y)
+       // result: (SETBE (CMPW x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh64x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpLeq32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq32 x y)
+       // result: (SETLE (CMPL x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool {
+func rewriteValueAMD64_OpLeq32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8])))
+       // match: (Leq32F x y)
+       // result: (SETGEF (UCOMISS y x))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64SETGEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq32U(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq32U x y)
+       // result: (SETBE (CMPL x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool {
+func rewriteValueAMD64_OpLeq64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8])))
+       // match: (Leq64 x y)
+       // result: (SETLE (CMPQ x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq64F x y)
+       // result: (SETGEF (UCOMISD y x))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETGEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
+func rewriteValueAMD64_OpLeq64U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8])))
+       // match: (Leq64U x y)
+       // result: (SETBE (CMPQ x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq8 x y)
+       // result: (SETLE (CMPB x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
+func rewriteValueAMD64_OpLeq8U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
+       // match: (Leq8U x y)
+       // result: (SETBE (CMPB x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
                v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLess16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less16 x y)
+       // result: (SETL (CMPW x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8x16(v *Value) bool {
+func rewriteValueAMD64_OpLess16U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
+       // match: (Less16U x y)
+       // result: (SETB (CMPW x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less32 x y)
+       // result: (SETL (CMPL x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8x32(v *Value) bool {
+func rewriteValueAMD64_OpLess32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8])))))
+       // match: (Less32F x y)
+       // result: (SETGF (UCOMISS y x))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETGF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess32U(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less32U x y)
+       // result: (SETB (CMPL x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
+func rewriteValueAMD64_OpLess64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8])))))
+       // match: (Less64 x y)
+       // result: (SETL (CMPQ x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less64F x y)
+       // result: (SETGF (UCOMISD y x))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETGF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
+func rewriteValueAMD64_OpLess64U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (Rsh8x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
+       // match: (Less64U x y)
+       // result: (SETB (CMPQ x y))
        for {
-               t := v.Type
                x := v_0
                y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       // match: (Rsh8x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less8 x y)
+       // result: (SETL (CMPB x y))
        for {
                x := v_0
                y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLess8U(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ScaleMaskedFloat32x16 x y mask)
-       // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Less8U x y)
+       // result: (SETB (CMPB x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpScaleMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ScaleMaskedFloat32x4 x y mask)
-       // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (LessEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpScaleMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat32x8 x y mask)
-       // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat32x4 x y)
+       // result: (VCMPPS128 [2] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpScaleMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x2 x y mask)
-       // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat32x8 x y)
+       // result: (VCMPPS256 [2] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpScaleMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x4 x y mask)
-       // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat64x2 x y)
+       // result: (VCMPPD128 [2] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpScaleMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x8 x y mask)
-       // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat64x4 x y)
+       // result: (VCMPPD256 [2] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSelect0(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Select0 (Mul64uover x y))
-       // result: (Select0 <typ.UInt64> (MULQU x y))
+       // match: (LessEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
        for {
-               if v_0.Op != OpMul64uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
                v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select0 (Mul32uover x y))
-       // result: (Select0 <typ.UInt32> (MULLU x y))
+}
+func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
        for {
-               if v_0.Op != OpMul32uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt32
-               v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
                v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select0 (Add64carry x y c))
-       // result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-       for {
-               if v_0.Op != OpAdd64carry {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v2.AddArg(c)
-               v1.AddArg(v2)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (Select0 (Sub64borrow x y c))
-       // result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-       for {
-               if v_0.Op != OpSub64borrow {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v2.AddArg(c)
-               v1.AddArg(v2)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (Select0 <t> (AddTupleFirst32 val tuple))
-       // result: (ADDL val (Select0 <t> tuple))
-       for {
-               t := v.Type
-               if v_0.Op != OpAMD64AddTupleFirst32 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               val := v_0.Args[0]
-               v.reset(OpAMD64ADDL)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v0.AddArg(tuple)
-               v.AddArg2(val, v0)
-               return true
-       }
-       // match: (Select0 <t> (AddTupleFirst64 val tuple))
-       // result: (ADDQ val (Select0 <t> tuple))
-       for {
-               t := v.Type
-               if v_0.Op != OpAMD64AddTupleFirst64 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               val := v_0.Args[0]
-               v.reset(OpAMD64ADDQ)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v0.AddArg(tuple)
-               v.AddArg2(val, v0)
-               return true
-       }
-       // match: (Select0 a:(ADDQconstflags [c] x))
-       // cond: a.Uses == 1
-       // result: (ADDQconst [c] x)
-       for {
-               a := v_0
-               if a.Op != OpAMD64ADDQconstflags {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               x := a.Args[0]
-               if !(a.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Select0 a:(ADDLconstflags [c] x))
-       // cond: a.Uses == 1
-       // result: (ADDLconst [c] x)
-       for {
-               a := v_0
-               if a.Op != OpAMD64ADDLconstflags {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               x := a.Args[0]
-               if !(a.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = int32ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       return false
 }
-func rewriteValueAMD64_OpSelect1(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Select1 (Mul64uover x y))
-       // result: (SETO (Select1 <types.TypeFlags> (MULQU x y)))
+       // match: (LessEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
        for {
-               if v_0.Op != OpMul64uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETO)
-               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1.AddArg2(x, y)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (Mul32uover x y))
-       // result: (SETO (Select1 <types.TypeFlags> (MULLU x y)))
+}
+func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
        for {
-               if v_0.Op != OpMul32uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETO)
-               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v1.AddArg2(x, y)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (Add64carry x y c))
-       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
+}
+func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
        for {
-               if v_0.Op != OpAdd64carry {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64NEGQ)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v4.AddArg(c)
-               v3.AddArg(v4)
-               v2.AddArg3(x, y, v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (Sub64borrow x y c))
-       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
+}
+func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
        for {
-               if v_0.Op != OpSub64borrow {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64NEGQ)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v4.AddArg(c)
-               v3.AddArg(v4)
-               v2.AddArg3(x, y, v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                v.AddArg(v0)
                return true
        }
-       // match: (Select1 (NEGLflags (MOVQconst [0])))
-       // result: (FlagEQ)
-       for {
-               if v_0.Op != OpAMD64NEGLflags {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpAMD64FlagEQ)
-               return true
-       }
-       // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
-       // result: x
-       for {
-               if v_0.Op != OpAMD64NEGLflags {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64SBBQcarrymask {
-                       break
-               }
-               x := v_0_0_0.Args[0]
-               v.copyOf(x)
-               return true
-       }
-       // match: (Select1 (AddTupleFirst32 _ tuple))
-       // result: (Select1 tuple)
-       for {
-               if v_0.Op != OpAMD64AddTupleFirst32 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               v.reset(OpSelect1)
-               v.AddArg(tuple)
-               return true
-       }
-       // match: (Select1 (AddTupleFirst64 _ tuple))
-       // result: (Select1 tuple)
-       for {
-               if v_0.Op != OpAMD64AddTupleFirst64 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               v.reset(OpSelect1)
-               v.AddArg(tuple)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ANDQlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicAnd64 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ANDLlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicAnd32 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ANDLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicOr64 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ORQlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicOr64 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ORQlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicOr32 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ORLlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicOr32 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ORLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       return false
 }
-func rewriteValueAMD64_OpSelectN(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       config := b.Func.Config
-       // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem)))))
-       // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)
-       // result: (Move [sc.Val64()] dst src mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               call := v_0
-               if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 {
-                       break
-               }
-               sym := auxToCall(call.Aux)
-               s1 := call.Args[0]
-               if s1.Op != OpAMD64MOVQstoreconst {
-                       break
-               }
-               sc := auxIntToValAndOff(s1.AuxInt)
-               _ = s1.Args[1]
-               s2 := s1.Args[1]
-               if s2.Op != OpAMD64MOVQstore {
-                       break
-               }
-               _ = s2.Args[2]
-               src := s2.Args[1]
-               s3 := s2.Args[2]
-               if s3.Op != OpAMD64MOVQstore {
-                       break
-               }
-               mem := s3.Args[2]
-               dst := s3.Args[1]
-               if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(sc.Val64())
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem))
-       // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)
-       // result: (Move [sz] dst src mem)
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
        for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               call := v_0
-               if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 {
-                       break
-               }
-               sym := auxToCall(call.Aux)
-               mem := call.Args[3]
-               dst := call.Args[0]
-               src := call.Args[1]
-               call_2 := call.Args[2]
-               if call_2.Op != OpAMD64MOVQconst {
-                       break
-               }
-               sz := auxIntToInt64(call_2.AuxInt)
-               if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(sz)
-               v.AddArg3(dst, src, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiFloat32x16 x y)
-       // result: (VINSERTF64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiFloat32x8 x y)
-       // result: (VINSERTF128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiFloat64x4 x y)
-       // result: (VINSERTF128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiFloat64x8 x y)
-       // result: (VINSERTF64X4512 [1] x y)
+       // match: (LessFloat32x4 x y)
+       // result: (VCMPPS128 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
+               v.reset(OpAMD64VCMPPS128)
                v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
+       // match: (LessFloat32x8 x y)
+       // result: (VCMPPS256 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
+               v.reset(OpAMD64VCMPPS256)
                v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       // match: (LessFloat64x2 x y)
+       // result: (VCMPPD128 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
+               v.reset(OpAMD64VCMPPD128)
                v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       // match: (LessFloat64x4 x y)
+       // result: (VCMPPD256 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
+               v.reset(OpAMD64VCMPPD256)
                v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiInt8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoad(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Load <t> ptr mem)
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (MOVQload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is32BitInt(t)
+       // result: (MOVLload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is16BitInt(t)
+       // result: (MOVWload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: (t.IsBoolean() || is8BitInt(t))
+       // result: (MOVBload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.IsBoolean() || is8BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is32BitFloat(t)
+       // result: (MOVSSload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is32BitFloat(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is64BitFloat(t)
+       // result: (MOVSDload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is64BitFloat(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 16
+       // result: (VMOVDQUload128 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload128)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 32
+       // result: (VMOVDQUload256 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload256)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 64
+       // result: (VMOVDQUload512 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload512)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x16 <t> ptr mem)
+       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x16)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x32 <t> ptr mem)
+       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x8 <t> ptr mem)
+       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x8)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x16 <t> ptr mem)
+       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x4 <t> ptr mem)
+       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x4)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x8 <t> ptr mem)
+       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x8)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask64x2 <t> ptr mem)
+       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x2)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       // match: (LoadMask64x4 <t> ptr mem)
+       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x4)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       // match: (LoadMask64x8 <t> ptr mem)
+       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       // match: (LoadMask8x16 <t> ptr mem)
+       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x16)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetHiUint8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       // match: (LoadMask8x32 <t> ptr mem)
+       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x32)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoFloat32x16 x y)
-       // result: (VINSERTF64X4512 [0] x y)
+       b := v.Block
+       // match: (LoadMask8x64 <t> ptr mem)
+       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoFloat32x8 x y)
-       // result: (VINSERTF128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked16 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK16load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoFloat64x4 x y)
-       // result: (VINSERTF128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK32load128 ptr mask mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load128)
+               v.AddArg3(ptr, mask, mem)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat64x8 x y)
-       // result: (VINSERTF64X4512 [0] x y)
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK32load256 ptr mask mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load256)
+               v.AddArg3(ptr, mask, mem)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK64load128 ptr mask mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load128)
+               v.AddArg3(ptr, mask, mem)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK64load256 ptr mask mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load256)
+               v.AddArg3(ptr, mask, mem)
+               return true
+       }
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked8 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK8load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LocalAddr <t> {sym} base mem)
+       // cond: t.Elem().HasPointers()
+       // result: (LEAQ {sym} (SPanchored base mem))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               sym := auxToSym(v.Aux)
+               base := v_0
+               mem := v_1
+               if !(t.Elem().HasPointers()) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+               v0.AddArg2(base, mem)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (LocalAddr <t> {sym} base _)
+       // cond: !t.Elem().HasPointers()
+       // result: (LEAQ {sym} base)
+       for {
+               t := v.Type
+               sym := auxToSym(v.Aux)
+               base := v_0
+               if !(!t.Elem().HasPointers()) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v.AddArg(base)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (Lsh16x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (Lsh16x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh32x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (Lsh32x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpLsh32x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (Lsh32x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (Lsh32x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh32x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh32x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Lsh32x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Lsh32x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Lsh64x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Lsh64x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Lsh64x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh64x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh64x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Lsh8x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Lsh8x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Lsh8x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Lsh8x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Lsh8x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
        for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Lsh8x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh8x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh8x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMax32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Max32F <t> x y)
+       // result: (Neg32F <t> (Min32F <t> (Neg32F <t> x) (Neg32F <t> y)))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpNeg32F)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpMin32F, t)
+               v1 := b.NewValue0(v.Pos, OpNeg32F, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpNeg32F, t)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMax64F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Max64F <t> x y)
+       // result: (Neg64F <t> (Min64F <t> (Neg64F <t> x) (Neg64F <t> y)))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpNeg64F)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpMin64F, t)
+               v1 := b.NewValue0(v.Pos, OpNeg64F, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpNeg64F, t)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMin32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Min32F <t> x y)
+       // result: (POR (MINSS <t> (MINSS <t> x y) x) (MINSS <t> x y))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+               v1.AddArg2(x, y)
+               v0.AddArg2(v1, x)
+               v.AddArg2(v0, v1)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMin64F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Min64F <t> x y)
+       // result: (POR (MINSD <t> (MINSD <t> x y) x) (MINSD <t> x y))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+               v1.AddArg2(x, y)
+               v0.AddArg2(v1, x)
+               v.AddArg2(v0, v1)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod16 [a] x y)
+       // result: (Select1 (DIVW [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod16u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod16u x y)
+       // result: (Select1 (DIVWU x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod32 [a] x y)
+       // result: (Select1 (DIVL [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod32u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod32u x y)
+       // result: (Select1 (DIVLU x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod64 [a] x y)
+       // result: (Select1 (DIVQ [a] x y))
        for {
+               a := auxIntToBool(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod64u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod64u x y)
+       // result: (Select1 (DIVQU x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod8 x y)
+       // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod8u(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod8u x y)
+       // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpMove(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Move [0] _ _ mem)
+       // result: mem
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               mem := v_2
+               v.copyOf(mem)
+               return true
+       }
+       // match: (Move [1] dst src mem)
+       // result: (MOVBstore dst (MOVBload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [2] dst src mem)
+       // result: (MOVWstore dst (MOVWload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 2 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [4] dst src mem)
+       // result: (MOVLstore dst (MOVLload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 4 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [8] dst src mem)
+       // result: (MOVQstore dst (MOVQload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [16] dst src mem)
+       // result: (MOVOstore dst (MOVOload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 16 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVOstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [3] dst src mem)
+       // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 3 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(2)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(2)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [5] dst src mem)
+       // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 5 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(4)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(4)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [6] dst src mem)
+       // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 6 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(4)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = int32ToAuxInt(4)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [7] dst src mem)
+       // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 7 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(3)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(3)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [9] dst src mem)
+       // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 9 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [10] dst src mem)
+       // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 10 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [11] dst src mem)
+       // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 11 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(7)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(7)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [12] dst src mem)
+       // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 12 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s >= 13 && s <= 15
+       // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s >= 13 && s <= 15) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = int32ToAuxInt(int32(s - 8))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AuxInt = int32ToAuxInt(int32(s - 8))
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > 16 && s < 192 && logLargeCopy(v, s)
+       // result: (LoweredMove [s] dst src mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > 16 && s < 192 && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64LoweredMove)
+               v.AuxInt = int64ToAuxInt(s)
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)
+       // result: (LoweredMoveLoop [s] dst src mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64LoweredMoveLoop)
+               v.AuxInt = int64ToAuxInt(s)
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > repMoveThreshold && s%8 != 0
+       // result: (Move [s-s%8] (OffPtr <dst.Type> dst [s%8]) (OffPtr <src.Type> src [s%8]) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > repMoveThreshold && s%8 != 0) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(s - s%8)
+               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
+               v0.AuxInt = int64ToAuxInt(s % 8)
+               v0.AddArg(dst)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
+               v1.AuxInt = int64ToAuxInt(s % 8)
+               v1.AddArg(src)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v3.AddArg2(src, mem)
+               v2.AddArg3(dst, v3, mem)
+               v.AddArg3(v0, v1, v2)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)
+       // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64REPMOVSQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(s / 8)
+               v.AddArg4(dst, src, v0, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNeg32F(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Neg32F x)
+       // result: (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64PXOR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32)
+               v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1)))
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNeg64F(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Neg64F x)
+       // result: (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64PXOR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64)
+               v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1))
+               v.AddArg2(x, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Neq16 x y)
+       // result: (SETNE (CMPW x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Neq32 x y)
+       // result: (SETNE (CMPL x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq32F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Neq32F x y)
+       // result: (SETNEF (UCOMISS x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Neq64 x y)
+       // result: (SETNE (CMPQ x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq64F(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Neq64F x y)
+       // result: (SETNEF (UCOMISD x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Neq8 x y)
+       // result: (SETNE (CMPB x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeqB(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (NeqB x y)
+       // result: (SETNE (CMPB x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeqPtr(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (NeqPtr x y)
+       // result: (SETNE (CMPQ x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNot(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Not x)
+       // result: (XORLconst [1] x)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat32x4 x y)
+       // result: (VCMPPS128 [4] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat32x8 x y)
+       // result: (VCMPPS256 [4] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat64x2 x y)
+       // result: (VCMPPD128 [4] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
-       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat64x4 x y)
+       // result: (VCMPPD256 [4] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
-       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
-       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
-       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
-       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
-       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x16 x y mask)
-       // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x32 x y mask)
-       // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x8 x y mask)
-       // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x16 x y mask)
-       // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpOffPtr(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x4 x y mask)
-       // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (OffPtr [off] ptr)
+       // cond: is32Bit(off)
+       // result: (ADDQconst [int32(off)] ptr)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               off := auxIntToInt64(v.AuxInt)
+               ptr := v_0
+               if !(is32Bit(off)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(int32(off))
+               v.AddArg(ptr)
+               return true
+       }
+       // match: (OffPtr [off] ptr)
+       // result: (ADDQ (MOVQconst [off]) ptr)
+       for {
+               off := auxIntToInt64(v.AuxInt)
+               ptr := v_0
+               v.reset(OpAMD64ADDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(off)
+               v.AddArg2(v0, ptr)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpPopCount16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x8 x y mask)
-       // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (PopCount16 x)
+       // result: (POPCNTL (MOVWQZX <typ.UInt32> x))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POPCNTL)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpPopCount8(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt64x2 x y mask)
-       // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (PopCount8 x)
+       // result: (POPCNTL (MOVBQZX <typ.UInt32> x))
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POPCNTL)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedInt64x4 x y mask)
-       // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (RoundToEven x)
+       // result: (ROUNDSD [0] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedInt64x8 x y mask)
-       // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat32x4 x)
+       // result: (VROUNDPS128 [0] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x16 x y mask)
-       // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat32x8 x)
+       // result: (VROUNDPS256 [0] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x32 x y mask)
-       // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat64x2 x)
+       // result: (VROUNDPD128 [0] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x8 x y mask)
-       // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat64x4 x)
+       // result: (VROUNDPD256 [0] x)
        for {
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x16 x y mask)
-       // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x4 x y mask)
-       // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x8 x y mask)
-       // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x2 x y mask)
-       // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x4 x y mask)
-       // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x8 x y mask)
-       // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x16 x y z mask)
-       // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x32 x y z mask)
-       // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x8 x y z mask)
-       // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x16 x y z mask)
-       // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x4 x y z mask)
-       // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x8 x y z mask)
-       // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+0] x)
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x2 x y z mask)
-       // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x4 x y z mask)
-       // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x8 x y z mask)
-       // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x16 x y z mask)
-       // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x32 x y z mask)
-       // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x8 x y z mask)
-       // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x16 x y z mask)
-       // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x4 x y z mask)
-       // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x8 x y z mask)
-       // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x2 x y z mask)
-       // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh16x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x4 x y z mask)
-       // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh16x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x8 x y z mask)
-       // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedInt16x16 x y mask)
-       // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh16x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt16x32 x y mask)
-       // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh16x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedInt16x8 x y mask)
-       // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt32x16 x y mask)
-       // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh16x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedInt32x4 x y mask)
-       // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt32x8 x y mask)
-       // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedInt64x2 x y mask)
-       // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt64x4 x y mask)
-       // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedInt64x8 x y mask)
-       // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint16x16 x y mask)
-       // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedUint16x32 x y mask)
-       // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint16x8 x y mask)
-       // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedUint32x16 x y mask)
-       // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh32x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint32x4 x y mask)
-       // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh32x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedUint32x8 x y mask)
-       // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh32x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint64x2 x y mask)
-       // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh32x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftLeftMaskedUint64x4 x y mask)
-       // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh32x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint64x8 x y mask)
-       // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh32x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x16 x y z mask)
-       // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh32x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x32 x y z mask)
-       // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh32x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x8 x y z mask)
-       // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x16 x y z mask)
-       // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x4 x y z mask)
-       // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x8 x y z mask)
-       // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x2 x y z mask)
-       // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x4 x y z mask)
-       // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x8 x y z mask)
-       // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x16 x y z mask)
-       // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x32 x y z mask)
-       // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh64x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x8 x y z mask)
-       // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x16 x y z mask)
-       // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh64x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x4 x y z mask)
-       // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh64x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x8 x y z mask)
-       // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x2 x y z mask)
-       // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh64x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x4 x y z mask)
-       // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh64x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x8 x y z mask)
-       // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
        for {
                x := v_0
                y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedInt16x16 x y mask)
-       // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt16x32 x y mask)
-       // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedInt16x8 x y mask)
-       // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt32x16 x y mask)
-       // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedInt32x4 x y mask)
-       // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt32x8 x y mask)
-       // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedInt64x2 x y mask)
-       // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt64x4 x y mask)
-       // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedInt64x8 x y mask)
-       // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint16x16 x y mask)
-       // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh8x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedUint16x32 x y mask)
-       // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh8x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint16x8 x y mask)
-       // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedUint32x16 x y mask)
-       // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh8x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint32x4 x y mask)
-       // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh8x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedUint32x8 x y mask)
-       // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint64x2 x y mask)
-       // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh8x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpShiftRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelect0(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftRightMaskedUint64x4 x y mask)
-       // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Select0 (Mul64uover x y))
+       // result: (Select0 <typ.UInt64> (MULQU x y))
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint64x8 x y mask)
-       // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Select0 (Mul32uover x y))
+       // result: (Select0 <typ.UInt32> (MULLU x y))
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_0.Op != OpMul32uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt32
+               v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSlicemask(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Slicemask <t> x)
-       // result: (SARQconst (NEGQ <t> x) [63])
+       // match: (Select0 (Add64carry x y c))
+       // result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
        for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64SARQconst)
-               v.AuxInt = int8ToAuxInt(63)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(x)
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v2.AddArg(c)
+               v1.AddArg(v2)
+               v0.AddArg3(x, y, v1)
                v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSpectreIndex(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (SpectreIndex <t> x y)
-       // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
+       // match: (Select0 (Sub64borrow x y c))
+       // result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64CMOVQCC)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v1.AddArg2(x, y)
-               v.AddArg3(x, v0, v1)
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v2.AddArg(c)
+               v1.AddArg(v2)
+               v0.AddArg3(x, y, v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (SpectreSliceIndex <t> x y)
-       // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
+       // match: (Select0 <t> (AddTupleFirst32 val tuple))
+       // result: (ADDL val (Select0 <t> tuple))
        for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64CMOVQHI)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v1.AddArg2(x, y)
-               v.AddArg3(x, v0, v1)
+               t := v.Type
+               if v_0.Op != OpAMD64AddTupleFirst32 {
+                       break
+               }
+               tuple := v_0.Args[1]
+               val := v_0.Args[0]
+               v.reset(OpAMD64ADDL)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg2(val, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x16 x mask)
-       // result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Select0 <t> (AddTupleFirst64 val tuple))
+       // result: (ADDQ val (Select0 <t> tuple))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               t := v.Type
+               if v_0.Op != OpAMD64AddTupleFirst64 {
+                       break
+               }
+               tuple := v_0.Args[1]
+               val := v_0.Args[0]
+               v.reset(OpAMD64ADDQ)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg2(val, v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x4 x mask)
-       // result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Select0 a:(ADDQconstflags [c] x))
+       // cond: a.Uses == 1
+       // result: (ADDQconst [c] x)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               a := v_0
+               if a.Op != OpAMD64ADDQconstflags {
+                       break
+               }
+               c := auxIntToInt32(a.AuxInt)
+               x := a.Args[0]
+               if !(a.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(c)
+               v.AddArg(x)
                return true
        }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x8 x mask)
-       // result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Select0 a:(ADDLconstflags [c] x))
+       // cond: a.Uses == 1
+       // result: (ADDLconst [c] x)
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               a := v_0
+               if a.Op != OpAMD64ADDLconstflags {
+                       break
+               }
+               c := auxIntToInt32(a.AuxInt)
+               x := a.Args[0]
+               if !(a.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = int32ToAuxInt(c)
+               v.AddArg(x)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSqrtMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelect1(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SqrtMaskedFloat64x2 x mask)
-       // result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Select1 (Mul64uover x y))
+       // result: (SETO (Select1 <types.TypeFlags> (MULQU x y)))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETO)
+               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1.AddArg2(x, y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat64x4 x mask)
-       // result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Select1 (Mul32uover x y))
+       // result: (SETO (Select1 <types.TypeFlags> (MULLU x y)))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpMul32uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETO)
+               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v1.AddArg2(x, y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat64x8 x mask)
-       // result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Select1 (Add64carry x y c))
+       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
        for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64NEGQ)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v4.AddArg(c)
+               v3.AddArg(v4)
+               v2.AddArg3(x, y, v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpStore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 8 && t.IsFloat()
-       // result: (MOVSDstore ptr val mem)
+       // match: (Select1 (Sub64borrow x y c))
+       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 8 && t.IsFloat()) {
+               if v_0.Op != OpSub64borrow {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
-               v.AddArg3(ptr, val, mem)
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64NEGQ)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v4.AddArg(c)
+               v3.AddArg(v4)
+               v2.AddArg3(x, y, v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 4 && t.IsFloat()
-       // result: (MOVSSstore ptr val mem)
+       // match: (Select1 (NEGLflags (MOVQconst [0])))
+       // result: (FlagEQ)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 4 && t.IsFloat()) {
+               if v_0.Op != OpAMD64NEGLflags {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
-               v.AddArg3(ptr, val, mem)
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 {
+                       break
+               }
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 8 && !t.IsFloat()
-       // result: (MOVQstore ptr val mem)
+       // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
+       // result: x
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 8 && !t.IsFloat()) {
+               if v_0.Op != OpAMD64NEGLflags {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AddArg3(ptr, val, mem)
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64SBBQcarrymask {
+                       break
+               }
+               x := v_0_0_0.Args[0]
+               v.copyOf(x)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 4 && !t.IsFloat()
-       // result: (MOVLstore ptr val mem)
+       // match: (Select1 (AddTupleFirst32 _ tuple))
+       // result: (Select1 tuple)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 4 && !t.IsFloat()) {
+               if v_0.Op != OpAMD64AddTupleFirst32 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AddArg3(ptr, val, mem)
+               tuple := v_0.Args[1]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 2
-       // result: (MOVWstore ptr val mem)
+       // match: (Select1 (AddTupleFirst64 _ tuple))
+       // result: (Select1 tuple)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 2) {
+               if v_0.Op != OpAMD64AddTupleFirst64 {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AddArg3(ptr, val, mem)
+               tuple := v_0.Args[1]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 1
-       // result: (MOVBstore ptr val mem)
+       // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ANDQlock ptr val mem)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 1) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicAnd64 {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQlock)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 16
-       // result: (VMOVDQUstore128 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ANDLlock ptr val mem)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicAnd32 {
                        break
                }
-               v.reset(OpAMD64VMOVDQUstore128)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLlock)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 32
-       // result: (VMOVDQUstore256 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicOr64 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ORQlock ptr val mem)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicOr64 {
                        break
                }
-               v.reset(OpAMD64VMOVDQUstore256)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ORQlock)
                v.AddArg3(ptr, val, mem)
                return true
        }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 64
-       // result: (VMOVDQUstore512 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicOr32 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ORLlock ptr val mem)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicOr32 {
                        break
                }
-               v.reset(OpAMD64VMOVDQUstore512)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ORLlock)
                v.AddArg3(ptr, val, mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelectN(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (StoreMask16x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
+       config := b.Func.Config
+       // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem)))))
+       // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)
+       // result: (Move [sc.Val64()] dst src mem)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               call := v_0
+               if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 {
+                       break
+               }
+               sym := auxToCall(call.Aux)
+               s1 := call.Args[0]
+               if s1.Op != OpAMD64MOVQstoreconst {
+                       break
+               }
+               sc := auxIntToValAndOff(s1.AuxInt)
+               _ = s1.Args[1]
+               s2 := s1.Args[1]
+               if s2.Op != OpAMD64MOVQstore {
+                       break
+               }
+               _ = s2.Args[2]
+               src := s2.Args[1]
+               s3 := s2.Args[2]
+               if s3.Op != OpAMD64MOVQstore {
+                       break
+               }
+               mem := s3.Args[2]
+               dst := s3.Args[1]
+               if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(sc.Val64())
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem))
+       // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)
+       // result: (Move [sz] dst src mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               call := v_0
+               if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 {
+                       break
+               }
+               sym := auxToCall(call.Aux)
+               mem := call.Args[3]
+               dst := call.Args[0]
+               src := call.Args[1]
+               call_2 := call.Args[2]
+               if call_2.Op != OpAMD64MOVQconst {
+                       break
+               }
+               sz := auxIntToInt64(call_2.AuxInt)
+               if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(sz)
+               v.AddArg3(dst, src, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
+       // match: (SetHiFloat32x16 x y)
+       // result: (VINSERTF64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
+       // match: (SetHiFloat32x8 x y)
+       // result: (VINSERTF128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
+       // match: (SetHiFloat64x4 x y)
+       // result: (VINSERTF128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
+       // match: (SetHiFloat64x8 x y)
+       // result: (VINSERTF64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
+       // match: (SetHiInt16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x2 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
+       // match: (SetHiInt16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
+       // match: (SetHiInt32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
+       // match: (SetHiInt32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
+       // match: (SetHiInt64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
+       // match: (SetHiInt64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x64 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
+       // match: (SetHiInt8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked16 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
+       // match: (SetHiInt8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK16store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK32store128 ptr mask val mem)
+       // match: (SetHiUint16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store128)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK32store256 ptr mask val mem)
+}
+func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store256)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
+}
+func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK64store128 ptr mask val mem)
+       // match: (SetHiUint32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store128)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK64store256 ptr mask val mem)
+}
+func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store256)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
+}
+func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked8 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
+       // match: (SetHiUint8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK8store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x16 x y mask)
-       // result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetHiUint8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x4 x y mask)
-       // result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x16 x y)
+       // result: (VINSERTF64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x8 x y mask)
-       // result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x8 x y)
+       // result: (VINSERTF128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x2 x y mask)
-       // result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x4 x y)
+       // result: (VINSERTF128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x4 x y mask)
-       // result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x8 x y)
+       // result: (VINSERTF64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x8 x y mask)
-       // result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x16 x y mask)
-       // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x32 x y mask)
-       // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (SetLoInt32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x8 x y mask)
-       // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x16 x y mask)
-       // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetLoInt64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x4 x y mask)
-       // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetLoInt64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x8 x y mask)
-       // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x2 x y mask)
-       // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetLoInt8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x4 x y mask)
-       // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetLoUint16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x8 x y mask)
-       // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetLoUint16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x16 x y mask)
-       // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (SetLoUint32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x32 x y mask)
-       // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (SetLoUint32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x64 x y mask)
-       // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (SetLoUint64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x16 x y mask)
-       // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (SetLoUint64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x32 x y mask)
-       // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (SetLoUint8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x8 x y mask)
-       // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (SetLoUint8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSlicemask(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint32x16 x y mask)
-       // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Slicemask <t> x)
+       // result: (SARQconst (NEGQ <t> x) [63])
        for {
+               t := v.Type
                x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SARQconst)
+               v.AuxInt = int8ToAuxInt(63)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSpectreIndex(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint32x4 x y mask)
-       // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (SpectreIndex <t> x y)
+       // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64CMOVQCC)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v1.AddArg2(x, y)
+               v.AddArg3(x, v0, v1)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint32x8 x y mask)
-       // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (SpectreSliceIndex <t> x y)
+       // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
        for {
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64CMOVQHI)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v1.AddArg2(x, y)
+               v.AddArg3(x, v0, v1)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpStore(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint64x2 x y mask)
-       // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 8 && t.IsFloat()
+       // result: (MOVSDstore ptr val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 8 && t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 4 && t.IsFloat()
+       // result: (MOVSSstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 4 && t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 8 && !t.IsFloat()
+       // result: (MOVQstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 8 && !t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 4 && !t.IsFloat()
+       // result: (MOVLstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 4 && !t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 2
+       // result: (MOVWstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 1
+       // result: (MOVBstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 16
+       // result: (VMOVDQUstore128 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore128)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 32
+       // result: (VMOVDQUstore256 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore256)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 64
+       // result: (VMOVDQUstore512 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore512)
+               v.AddArg3(ptr, val, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSubMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint64x4 x y mask)
-       // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (StoreMask16x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint64x8 x y mask)
-       // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (StoreMask16x32 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint8x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint8x16 x y mask)
-       // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMask16x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint8x32 x y mask)
-       // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMask32x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubMaskedUint8x64 x y mask)
-       // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMask32x4 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt16x16 x y mask)
-       // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (StoreMask32x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt16x32 x y mask)
-       // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (StoreMask64x2 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt16x8 x y mask)
-       // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (StoreMask64x4 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt8x16 x y mask)
-       // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMask64x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt8x32 x y mask)
-       // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMask8x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedInt8x64 x y mask)
-       // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMask8x32 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedUint16x16 x y mask)
-       // result: (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (StoreMask8x64 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                return true
        }
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
+       v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedUint16x32 x y mask)
-       // result: (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (StoreMasked16 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked512)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK16store512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
+       v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedUint16x8 x y mask)
-       // result: (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK32store128 ptr mask val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store128)
+               v.AddArg4(ptr, mask, val, mem)
                return true
        }
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubSaturatedMaskedUint8x16 x y mask)
-       // result: (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK32store256 ptr mask val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store256)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
+       v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedUint8x32 x y mask)
-       // result: (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK64store128 ptr mask val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store128)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK64store256 ptr mask val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store256)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
+       v_3 := v.Args[3]
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (SubSaturatedMaskedUint8x64 x y mask)
-       // result: (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMasked8 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
        for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked512)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK8store512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                return true
        }
+       return false
 }
 func rewriteValueAMD64_OpTrunc(v *Value) bool {
        v_0 := v.Args[0]
@@ -56567,114 +38729,6 @@ func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool {
        v_0 := v.Args[0]
        // match: (TruncScaledResidueFloat32x16 [a] x)
@@ -56753,330 +38807,6 @@ func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x16 x y mask)
-       // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x4 x y mask)
-       // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x8 x y mask)
-       // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x2 x y mask)
-       // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x4 x y mask)
-       // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x8 x y mask)
-       // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x16 x y mask)
-       // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x4 x y mask)
-       // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x8 x y mask)
-       // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x2 x y mask)
-       // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x4 x y mask)
-       // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x8 x y mask)
-       // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpZero(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 90149300b2ccdab29dacf34d3f7cc7e6e7c9c4ed..e6c6874bddc3b3156cb476db63f18a06afe64717 100644 (file)
@@ -24,18 +24,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
@@ -69,51 +57,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.AddMasked", opLen3(ssa.OpAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.AddMasked", opLen3(ssa.OpAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.AddMasked", opLen3(ssa.OpAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddMasked", opLen3(ssa.OpAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddMasked", opLen3(ssa.OpAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddMasked", opLen3(ssa.OpAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AddMasked", opLen3(ssa.OpAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AddMasked", opLen3(ssa.OpAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AddMasked", opLen3(ssa.OpAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AddMasked", opLen3(ssa.OpAddMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AddMasked", opLen3(ssa.OpAddMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AddMasked", opLen3(ssa.OpAddMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AddMasked", opLen3(ssa.OpAddMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AddMasked", opLen3(ssa.OpAddMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AddMasked", opLen3(ssa.OpAddMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AddMasked", opLen3(ssa.OpAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AddMasked", opLen3(ssa.OpAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AddMasked", opLen3(ssa.OpAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AddMasked", opLen3(ssa.OpAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AddMasked", opLen3(ssa.OpAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AddMasked", opLen3(ssa.OpAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AddMasked", opLen3(ssa.OpAddMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AddMasked", opLen3(ssa.OpAddMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AddMasked", opLen3(ssa.OpAddMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -140,18 +89,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
@@ -180,18 +117,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AndMasked", opLen3(ssa.OpAndMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AndMasked", opLen3(ssa.OpAndMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AndMasked", opLen3(ssa.OpAndMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AndMasked", opLen3(ssa.OpAndMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AndMasked", opLen3(ssa.OpAndMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AndMasked", opLen3(ssa.OpAndMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AndMasked", opLen3(ssa.OpAndMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AndMasked", opLen3(ssa.OpAndMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AndMasked", opLen3(ssa.OpAndMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.AndNot", opLen2_21(ssa.OpAndNotInt8x64, types.TypeVec512), sys.AMD64)
@@ -216,30 +141,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
@@ -250,16 +157,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
@@ -270,16 +167,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
@@ -290,16 +177,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
@@ -310,24 +187,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
@@ -361,15 +226,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
@@ -382,24 +241,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.DivMasked", opLen3(ssa.OpDivMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.DivMasked", opLen3(ssa.OpDivMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.DivMasked", opLen3(ssa.OpDivMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@@ -430,36 +277,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
@@ -500,42 +317,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
        addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x16, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x32, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x64, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
        addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
        addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
        addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
@@ -622,78 +418,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
@@ -722,66 +452,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.LessMasked", opLen3(ssa.OpLessMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.LessMasked", opLen3(ssa.OpLessMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.LessMasked", opLen3(ssa.OpLessMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.LessMasked", opLen3(ssa.OpLessMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.LessMasked", opLen3(ssa.OpLessMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.LessMasked", opLen3(ssa.OpLessMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.LessMasked", opLen3(ssa.OpLessMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.LessMasked", opLen3(ssa.OpLessMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.LessMasked", opLen3(ssa.OpLessMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.LessMasked", opLen3(ssa.OpLessMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.LessMasked", opLen3(ssa.OpLessMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.LessMasked", opLen3(ssa.OpLessMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.LessMasked", opLen3(ssa.OpLessMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.LessMasked", opLen3(ssa.OpLessMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.LessMasked", opLen3(ssa.OpLessMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.LessMasked", opLen3(ssa.OpLessMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.LessMasked", opLen3(ssa.OpLessMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.LessMasked", opLen3(ssa.OpLessMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.LessMasked", opLen3(ssa.OpLessMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.LessMasked", opLen3(ssa.OpLessMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.LessMasked", opLen3(ssa.OpLessMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.LessMasked", opLen3(ssa.OpLessMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.LessMasked", opLen3(ssa.OpLessMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.LessMasked", opLen3(ssa.OpLessMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.LessMasked", opLen3(ssa.OpLessMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.LessMasked", opLen3(ssa.OpLessMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.LessMasked", opLen3(ssa.OpLessMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.LessMasked", opLen3(ssa.OpLessMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.LessMasked", opLen3(ssa.OpLessMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.LessMasked", opLen3(ssa.OpLessMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
@@ -812,36 +482,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
@@ -872,36 +512,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MinMasked", opLen3(ssa.OpMinMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MinMasked", opLen3(ssa.OpMinMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MinMasked", opLen3(ssa.OpMinMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MinMasked", opLen3(ssa.OpMinMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MinMasked", opLen3(ssa.OpMinMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MinMasked", opLen3(ssa.OpMinMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.MinMasked", opLen3(ssa.OpMinMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.MinMasked", opLen3(ssa.OpMinMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.MinMasked", opLen3(ssa.OpMinMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MinMasked", opLen3(ssa.OpMinMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MinMasked", opLen3(ssa.OpMinMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MinMasked", opLen3(ssa.OpMinMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MinMasked", opLen3(ssa.OpMinMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MinMasked", opLen3(ssa.OpMinMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MinMasked", opLen3(ssa.OpMinMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MinMasked", opLen3(ssa.OpMinMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MinMasked", opLen3(ssa.OpMinMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MinMasked", opLen3(ssa.OpMinMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MinMasked", opLen3(ssa.OpMinMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MinMasked", opLen3(ssa.OpMinMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MinMasked", opLen3(ssa.OpMinMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MinMasked", opLen3(ssa.OpMinMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MinMasked", opLen3(ssa.OpMinMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MinMasked", opLen3(ssa.OpMinMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MinMasked", opLen3(ssa.OpMinMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MinMasked", opLen3(ssa.OpMinMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MinMasked", opLen3(ssa.OpMinMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MinMasked", opLen3(ssa.OpMinMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MinMasked", opLen3(ssa.OpMinMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MinMasked", opLen3(ssa.OpMinMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
@@ -932,24 +542,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
@@ -960,48 +558,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1016,36 +578,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
@@ -1070,30 +602,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
@@ -1118,18 +626,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.OrMasked", opLen3(ssa.OpOrMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.OrMasked", opLen3(ssa.OpOrMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.OrMasked", opLen3(ssa.OpOrMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.OrMasked", opLen3(ssa.OpOrMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.OrMasked", opLen3(ssa.OpOrMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.OrMasked", opLen3(ssa.OpOrMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.OrMasked", opLen3(ssa.OpOrMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.OrMasked", opLen3(ssa.OpOrMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.OrMasked", opLen3(ssa.OpOrMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
@@ -1184,84 +680,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1274,18 +704,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
        addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1298,18 +716,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
        addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
@@ -1322,18 +728,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64)
@@ -1346,18 +740,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1368,36 +750,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
@@ -1484,42 +848,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1556,42 +884,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Uint64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@@ -1628,42 +920,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1700,54 +956,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1778,36 +992,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.SubMasked", opLen3(ssa.OpSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.SubMasked", opLen3(ssa.OpSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.SubMasked", opLen3(ssa.OpSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.SubMasked", opLen3(ssa.OpSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.SubMasked", opLen3(ssa.OpSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.SubMasked", opLen3(ssa.OpSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.SubMasked", opLen3(ssa.OpSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.SubMasked", opLen3(ssa.OpSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.SubMasked", opLen3(ssa.OpSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.SubMasked", opLen3(ssa.OpSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.SubMasked", opLen3(ssa.OpSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.SubMasked", opLen3(ssa.OpSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.SubMasked", opLen3(ssa.OpSubMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.SubMasked", opLen3(ssa.OpSubMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.SubMasked", opLen3(ssa.OpSubMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.SubMasked", opLen3(ssa.OpSubMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.SubMasked", opLen3(ssa.OpSubMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.SubMasked", opLen3(ssa.OpSubMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SubMasked", opLen3(ssa.OpSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SubMasked", opLen3(ssa.OpSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SubMasked", opLen3(ssa.OpSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SubMasked", opLen3(ssa.OpSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SubMasked", opLen3(ssa.OpSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SubMasked", opLen3(ssa.OpSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.SubMasked", opLen3(ssa.OpSubMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.SubMasked", opLen3(ssa.OpSubMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.SubMasked", opLen3(ssa.OpSubMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1834,18 +1018,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1856,24 +1028,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
@@ -1898,18 +1058,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.XorMasked", opLen3(ssa.OpXorMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.XorMasked", opLen3(ssa.OpXorMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.XorMasked", opLen3(ssa.OpXorMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.XorMasked", opLen3(ssa.OpXorMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.XorMasked", opLen3(ssa.OpXorMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.XorMasked", opLen3(ssa.OpXorMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.XorMasked", opLen3(ssa.OpXorMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.XorMasked", opLen3(ssa.OpXorMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.XorMasked", opLen3(ssa.OpXorMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.XorMasked", opLen3(ssa.OpXorMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.XorMasked", opLen3(ssa.OpXorMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.XorMasked", opLen3(ssa.OpXorMaskedUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.blend", opLen3(ssa.OpblendInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.blend", opLen3(ssa.OpblendInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.blendMasked", opLen3(ssa.OpblendMaskedInt8x64, types.TypeVec512), sys.AMD64)
index 22decb9d7e69c9dbaaebdbd5d47b838ae273aafc..4044addd8c16ad6ec8f8e5ead86d8a7d6b72d63a 100644 (file)
@@ -11,6 +11,7 @@ import (
        "slices"
        "strconv"
        "strings"
+       "unicode"
 
        "simd/_gen/unify"
 )
@@ -100,6 +101,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
        o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
        if isMasked {
                o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
+               if unicode.IsUpper([]rune(o.Go)[0]) {
+                       trueVal := "true"
+                       o.NoGenericOps = &trueVal
+                       o.NoTypes = &trueVal
+               }
        }
 
        o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
index 7fd20cf5d79f2ae572dc3616d16da35313895687..f8526d27e989d70574a18618fbd7f626dd7f638e 100644 (file)
@@ -15,44 +15,6 @@ import (
 // from > and =
 var comparisonFixed bool = simd.HasAVX512()
 
-func TestLessMasked(t *testing.T) {
-       if simd.HasAVX512() {
-               testFloat32x4CompareMasked(t, simd.Float32x4.LessMasked, lessSlice[float32])
-               testFloat32x8CompareMasked(t, simd.Float32x8.LessMasked, lessSlice[float32])
-               testFloat64x2CompareMasked(t, simd.Float64x2.LessMasked, lessSlice[float64])
-               testFloat64x4CompareMasked(t, simd.Float64x4.LessMasked, lessSlice[float64])
-
-               testInt16x16CompareMasked(t, simd.Int16x16.LessMasked, lessSlice[int16])
-               testInt16x8CompareMasked(t, simd.Int16x8.LessMasked, lessSlice[int16])
-               testInt32x4CompareMasked(t, simd.Int32x4.LessMasked, lessSlice[int32])
-               testInt32x8CompareMasked(t, simd.Int32x8.LessMasked, lessSlice[int32])
-               testInt64x2CompareMasked(t, simd.Int64x2.LessMasked, lessSlice[int64])
-               testInt64x4CompareMasked(t, simd.Int64x4.LessMasked, lessSlice[int64])
-               testInt8x16CompareMasked(t, simd.Int8x16.LessMasked, lessSlice[int8])
-               testInt8x32CompareMasked(t, simd.Int8x32.LessMasked, lessSlice[int8])
-
-               testUint16x16CompareMasked(t, simd.Uint16x16.LessMasked, lessSlice[uint16])
-               testUint16x8CompareMasked(t, simd.Uint16x8.LessMasked, lessSlice[uint16])
-               testUint32x4CompareMasked(t, simd.Uint32x4.LessMasked, lessSlice[uint32])
-               testUint32x8CompareMasked(t, simd.Uint32x8.LessMasked, lessSlice[uint32])
-               testUint64x2CompareMasked(t, simd.Uint64x2.LessMasked, lessSlice[uint64])
-               testUint64x4CompareMasked(t, simd.Uint64x4.LessMasked, lessSlice[uint64])
-               testUint8x16CompareMasked(t, simd.Uint8x16.LessMasked, lessSlice[uint8])
-               testUint8x32CompareMasked(t, simd.Uint8x32.LessMasked, lessSlice[uint8])
-
-               testFloat32x16CompareMasked(t, simd.Float32x16.LessMasked, lessSlice[float32])
-               testFloat64x8CompareMasked(t, simd.Float64x8.LessMasked, lessSlice[float64])
-               testInt8x64CompareMasked(t, simd.Int8x64.LessMasked, lessSlice[int8])
-               testInt16x32CompareMasked(t, simd.Int16x32.LessMasked, lessSlice[int16])
-               testInt32x16CompareMasked(t, simd.Int32x16.LessMasked, lessSlice[int32])
-               testInt64x8CompareMasked(t, simd.Int64x8.LessMasked, lessSlice[int64])
-               testUint8x64CompareMasked(t, simd.Uint8x64.LessMasked, lessSlice[uint8])
-               testUint16x32CompareMasked(t, simd.Uint16x32.LessMasked, lessSlice[uint16])
-               testUint32x16CompareMasked(t, simd.Uint32x16.LessMasked, lessSlice[uint32])
-               testUint64x8CompareMasked(t, simd.Uint64x8.LessMasked, lessSlice[uint64])
-       }
-}
-
 func TestLess(t *testing.T) {
        testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32])
        testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32])
index d6fcd065bbbad0776f5523be585e21e31d0b058b..76bbf738cb173625b395cd1cbe5c83fe972f1216 100644 (file)
@@ -66,92 +66,6 @@ func (x Int64x4) Abs() Int64x4
 // Asm: VPABSQ, CPU Feature: AVX512
 func (x Int64x8) Abs() Int64x8
 
-/* AbsMasked */
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x16) AbsMasked(mask Mask8x16) Int8x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x32) AbsMasked(mask Mask8x32) Int8x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x64) AbsMasked(mask Mask8x64) Int8x64
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x8) AbsMasked(mask Mask16x8) Int16x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x16) AbsMasked(mask Mask16x16) Int16x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x32) AbsMasked(mask Mask16x32) Int16x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x4) AbsMasked(mask Mask32x4) Int32x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x8) AbsMasked(mask Mask32x8) Int32x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x16) AbsMasked(mask Mask32x16) Int32x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x2) AbsMasked(mask Mask64x2) Int64x2
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x4) AbsMasked(mask Mask64x4) Int64x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x8) AbsMasked(mask Mask64x8) Int64x8
-
 /* Add */
 
 // Add adds corresponding elements of two vectors.
@@ -321,29 +235,6 @@ func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
 // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
 func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
 
-/* AddDotProdPairsSaturatedMasked */
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) AddDotProdPairsSaturatedMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) AddDotProdPairsSaturatedMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdPairsSaturatedMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadruple */
 
 // AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
@@ -361,29 +252,6 @@ func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleMasked */
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
 /* AddDotProdQuadrupleSaturated */
 
 // AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
@@ -401,377 +269,142 @@ func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
 func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleSaturatedMasked */
+/* AddPairs */
 
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleSaturatedMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x4) AddPairs(y Float32x4) Float32x4
 
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleSaturatedMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x8) AddPairs(y Float32x8) Float32x8
 
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleSaturatedMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
-/* AddMasked */
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x2) AddPairs(y Float64x2) Float64x2
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x4) AddPairs(y Float64x4) Float64x4
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Int16x8) AddPairs(y Int16x8) Int16x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Int16x16) AddPairs(y Int16x16) Int16x16
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Int32x4) AddPairs(y Int32x4) Int32x4
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Int32x8) AddPairs(y Int32x8) Int32x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
+/* AddPairsSaturated */
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
+// Asm: VPHADDSW, CPU Feature: AVX
+func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 //
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
+// Asm: VPHADDSW, CPU Feature: AVX2
+func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
+/* AddSaturated */
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Int8x16) AddSaturated(y Int8x16) Int8x16
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Int8x32) AddSaturated(y Int8x32) Int8x32
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
+// Asm: VPADDSB, CPU Feature: AVX512
+func (x Int8x64) AddSaturated(y Int8x64) Int8x64
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Int16x8) AddSaturated(y Int16x8) Int16x8
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Int16x16) AddSaturated(y Int16x16) Int16x16
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
+// Asm: VPADDSW, CPU Feature: AVX512
+func (x Int16x32) AddSaturated(y Int16x32) Int16x32
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
+// Asm: VPADDUSB, CPU Feature: AVX
+func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
 
-// AddMasked adds corresponding elements of two vectors.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPADDUSB, CPU Feature: AVX2
+func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
+// Asm: VPADDUSB, CPU Feature: AVX512
+func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
 
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* AddPairs */
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x4) AddPairs(y Float32x4) Float32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x8) AddPairs(y Float32x8) Float32x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x2) AddPairs(y Float64x2) Float64x2
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x4) AddPairs(y Float64x4) Float64x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Int16x8) AddPairs(y Int16x8) Int16x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Int16x16) AddPairs(y Int16x16) Int16x16
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Int32x4) AddPairs(y Int32x4) Int32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Int32x8) AddPairs(y Int32x8) Int32x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
-
-/* AddPairsSaturated */
-
-// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDSW, CPU Feature: AVX
-func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
-
-// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDSW, CPU Feature: AVX2
-func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
-
-/* AddSaturated */
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Int8x16) AddSaturated(y Int8x16) Int8x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Int8x32) AddSaturated(y Int8x32) Int8x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturated(y Int8x64) Int8x64
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Int16x8) AddSaturated(y Int16x8) Int16x8
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Int16x16) AddSaturated(y Int16x16) Int16x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturated(y Int16x32) Int16x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX
-func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX2
-func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
+// AddSaturated adds corresponding elements of two vectors with saturation.
 //
 // Asm: VPADDUSW, CPU Feature: AVX
 func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
@@ -786,92 +419,6 @@ func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
 // Asm: VPADDUSW, CPU Feature: AVX512
 func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
 
-/* AddSaturatedMasked */
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x16) AddSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x32) AddSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x8) AddSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x16) AddSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* AddSub */
 
 // AddSub subtracts even elements and adds odd elements of two vectors.
@@ -1016,105 +563,19 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4
 // Asm: VPANDQ, CPU Feature: AVX512
 func (x Uint64x8) And(y Uint64x8) Uint64x8
 
-/* AndMasked */
+/* AndNot */
 
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AndNot performs a bitwise x &^ y.
 //
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int8x16) AndNot(y Int8x16) Int8x16
 
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AndNot performs a bitwise x &^ y.
 //
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int8x32) AndNot(y Int8x32) Int8x32
 
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* AndNot */
-
-// AndNot performs a bitwise x &^ y.
-//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int8x16) AndNot(y Int8x16) Int8x16
-
-// AndNot performs a bitwise x &^ y.
-//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int8x32) AndNot(y Int8x32) Int8x32
-
-// AndNot performs a bitwise x &^ y.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512
 func (x Int8x64) AndNot(y Int8x64) Int8x64
@@ -1224,92 +685,6 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
 // Asm: VPANDNQ, CPU Feature: AVX512
 func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
-/* AndNotMasked */
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* Average */
 
 // Average computes the rounded average of corresponding elements.
@@ -1342,50 +717,6 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
 // Asm: VPAVGW, CPU Feature: AVX512
 func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
-/* AverageMasked */
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Broadcast128 */
 
 // Broadcast128 copies element zero of its (128-bit) input to all elements of
@@ -1448,88 +779,6 @@ func (x Uint32x4) Broadcast128() Uint32x4
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast128() Uint64x2
 
-/* Broadcast128Masked */
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
-
 /* Broadcast256 */
 
 // Broadcast256 copies element zero of its (128-bit) input to all elements of
@@ -1592,128 +841,46 @@ func (x Uint32x4) Broadcast256() Uint32x8
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
 func (x Uint64x2) Broadcast256() Uint64x4
 
-/* Broadcast256Masked */
+/* Broadcast512 */
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
+func (x Float32x4) Broadcast512() Float32x16
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
+func (x Float64x2) Broadcast512() Float64x8
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
+func (x Int8x16) Broadcast512() Int8x64
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
+func (x Int16x8) Broadcast512() Int16x32
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
+func (x Int32x4) Broadcast512() Int32x16
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
+func (x Int64x2) Broadcast512() Int64x8
 
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
-
-/* Broadcast512 */
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512() Float32x16
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512() Float64x8
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512() Int8x64
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512() Int16x32
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512() Int32x16
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512() Int64x8
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
 //
 // Asm: VPBROADCASTB, CPU Feature: AVX512
 func (x Uint8x16) Broadcast512() Uint8x64
@@ -1736,88 +903,6 @@ func (x Uint32x4) Broadcast512() Uint32x16
 // Asm: VPBROADCASTQ, CPU Feature: AVX512
 func (x Uint64x2) Broadcast512() Uint64x8
 
-/* Broadcast512Masked */
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
-
 /* Ceil */
 
 // Ceil rounds elements up to the nearest integer.
@@ -1884,62 +969,6 @@ func (x Float64x4) CeilScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaled(prec uint8) Float64x8
 
-/* CeilScaledMasked */
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* CeilScaledResidue */
 
 // CeilScaledResidue computes the difference after ceiling with specified precision.
@@ -1984,62 +1013,6 @@ func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
 
-/* CeilScaledResidueMasked */
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Compress */
 
 // Compress performs a compression on vector x using mask by
@@ -2239,29 +1212,6 @@ func (x Float32x8) ConvertToInt32() Int32x8
 // Asm: VCVTTPS2DQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToInt32() Int32x16
 
-/* ConvertToInt32Masked */
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16
-
 /* ConvertToUint32 */
 
 // ConvertToUint32Masked converts element values to uint32.
@@ -2279,29 +1229,6 @@ func (x Float32x8) ConvertToUint32() Uint32x8
 // Asm: VCVTPS2UDQ, CPU Feature: AVX512
 func (x Float32x16) ConvertToUint32() Uint32x16
 
-/* ConvertToUint32Masked */
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
-
 /* CopySign */
 
 // CopySign returns the product of the first operand with -1, 0, or 1,
@@ -2372,57 +1299,13 @@ func (x Float64x4) Div(y Float64x4) Float64x4
 // Asm: VDIVPD, CPU Feature: AVX512
 func (x Float64x8) Div(y Float64x8) Float64x8
 
-/* DivMasked */
+/* DotProdPairs */
 
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// DotProdPairs multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
 //
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
-
-/* DotProdPairs */
-
-// DotProdPairs multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
 
 // DotProdPairs multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
@@ -2436,32 +1319,6 @@ func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
 // Asm: VPMADDWD, CPU Feature: AVX512
 func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
 
-/* DotProdPairsMasked */
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x8) DotProdPairsMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x16) DotProdPairsMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x32) DotProdPairsMasked(y Int16x32, mask Mask16x32) Int32x16
-
 /* DotProdPairsSaturated */
 
 // DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
@@ -2482,32 +1339,6 @@ func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
 // Asm: VPMADDUBSW, CPU Feature: AVX512
 func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
 
-/* DotProdPairsSaturatedMasked */
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x16) DotProdPairsSaturatedMasked(y Int8x16, mask Mask16x8) Int16x8
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x32) DotProdPairsSaturatedMasked(y Int8x32, mask Mask16x16) Int16x16
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x64) DotProdPairsSaturatedMasked(y Int8x64, mask Mask16x32) Int16x32
-
 /* Equal */
 
 // Equal compares for equality.
@@ -2660,218 +1491,6 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Equal(y Float64x8) Mask64x8
 
-/* EqualMasked */
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
 /* Expand */
 
 // Expand performs an expansion on a vector x whose elements are packed to lower parts.
@@ -3120,162 +1739,50 @@ func (x Float64x4) FloorScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaled(prec uint8) Float64x8
 
-/* FloorScaledMasked */
+/* FloorScaledResidue */
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledMasked(prec uint8, mask Mask32x4) Float32x4
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledMasked(prec uint8, mask Mask32x8) Float32x8
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledMasked(prec uint8, mask Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledMasked(prec uint8, mask Mask64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512
+func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledMasked(prec uint8, mask Mask64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512
+func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
 
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* FloorScaledResidue */
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
 //
 // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
 
-/* FloorScaledResidueMasked */
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* GaloisFieldAffineTransform */
 
 // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -3343,85 +1850,6 @@ func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x3
 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
 
-/* GaloisFieldAffineTransformInverseMasked */
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
-/* GaloisFieldAffineTransformMasked */
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
 /* GaloisFieldMul */
 
 // GaloisFieldMul computes element-wise GF(2^8) multiplication with
@@ -3442,32 +1870,6 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 
-/* GaloisFieldMulMasked */
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
 /* GetElem */
 
 // GetElem retrieves a single constant-indexed element's value.
@@ -3928,4139 +2330,1489 @@ func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
 
-/* GreaterEqualMasked */
+/* IsNan */
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) IsNan(y Float32x4) Mask32x4
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) IsNan(y Float32x8) Mask32x8
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
 // Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
+func (x Float32x16) IsNan(y Float32x16) Mask32x16
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) IsNan(y Float64x2) Mask64x2
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) IsNan(y Float64x4) Mask64x4
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
 //
 // Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
+func (x Float64x8) IsNan(y Float64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
+/* Less */
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) Less(y Float32x4) Mask32x4
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) Less(y Float32x8) Mask32x8
 
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) Less(y Float32x16) Mask32x16
+
+// Less compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) Less(y Float64x2) Mask64x2
 
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) Less(y Float64x4) Mask64x4
+
+// Less compares for less than.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) Less(y Float64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) Less(y Int8x64) Mask8x64
+
+// Less compares for less than.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+func (x Int16x32) Less(y Int16x32) Mask16x32
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+func (x Int32x16) Less(y Int32x16) Mask32x16
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPQ, CPU Feature: AVX512
+func (x Int64x8) Less(y Int64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) Less(y Uint8x64) Mask8x64
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) Less(y Uint16x32) Mask16x32
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) Less(y Uint32x16) Mask32x16
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPCMPUQ, CPU Feature: AVX512
+func (x Uint64x8) Less(y Uint64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+/* LessEqual */
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) LessEqual(y Float32x4) Mask32x4
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) LessEqual(y Float32x8) Mask32x8
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) LessEqual(y Float32x16) Mask32x16
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) LessEqual(y Float64x2) Mask64x2
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) LessEqual(y Float64x4) Mask64x4
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) LessEqual(y Float64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) LessEqual(y Int8x64) Mask8x64
 
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPCMPW, CPU Feature: AVX512
+func (x Int16x32) LessEqual(y Int16x32) Mask16x32
 
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPD, CPU Feature: AVX512
+func (x Int32x16) LessEqual(y Int32x16) Mask32x16
+
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPCMPQ, CPU Feature: AVX512
+func (x Int64x8) LessEqual(y Int64x8) Mask64x8
 
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
+
+// LessEqual compares for less than or equal.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
 
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
+
+// LessEqual compares for less than or equal.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
 
-/* GreaterMasked */
+/* Max */
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x4) Max(y Float32x4) Float32x4
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x8) Max(y Float32x8) Float32x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VMAXPS, CPU Feature: AVX512
+func (x Float32x16) Max(y Float32x16) Float32x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x2) Max(y Float64x2) Float64x2
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x4) Max(y Float64x4) Float64x4
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VMAXPD, CPU Feature: AVX512
+func (x Float64x8) Max(y Float64x8) Float64x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
+// Asm: VPMAXSB, CPU Feature: AVX
+func (x Int8x16) Max(y Int8x16) Int8x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VPMAXSB, CPU Feature: AVX2
+func (x Int8x32) Max(y Int8x32) Int8x32
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VPMAXSB, CPU Feature: AVX512
+func (x Int8x64) Max(y Int8x64) Int8x64
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VPMAXSW, CPU Feature: AVX
+func (x Int16x8) Max(y Int16x8) Int16x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VPMAXSW, CPU Feature: AVX2
+func (x Int16x16) Max(y Int16x16) Int16x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VPMAXSW, CPU Feature: AVX512
+func (x Int16x32) Max(y Int16x32) Int16x32
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VPMAXSD, CPU Feature: AVX
+func (x Int32x4) Max(y Int32x4) Int32x4
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPMAXSD, CPU Feature: AVX2
+func (x Int32x8) Max(y Int32x8) Int32x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VPMAXSD, CPU Feature: AVX512
+func (x Int32x16) Max(y Int32x16) Int32x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x2) Max(y Int64x2) Int64x2
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x4) Max(y Int64x4) Int64x4
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x8) Max(y Int64x8) Int64x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VPMAXUB, CPU Feature: AVX
+func (x Uint8x16) Max(y Uint8x16) Uint8x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VPMAXUB, CPU Feature: AVX2
+func (x Uint8x32) Max(y Uint8x32) Uint8x32
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VPMAXUB, CPU Feature: AVX512
+func (x Uint8x64) Max(y Uint8x64) Uint8x64
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VPMAXUW, CPU Feature: AVX
+func (x Uint16x8) Max(y Uint16x8) Uint16x8
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VPMAXUW, CPU Feature: AVX2
+func (x Uint16x16) Max(y Uint16x16) Uint16x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VPMAXUW, CPU Feature: AVX512
+func (x Uint16x32) Max(y Uint16x32) Uint16x32
 
-// GreaterMasked compares for greater than.
+// Max computes the maximum of corresponding elements.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMAXUD, CPU Feature: AVX
+func (x Uint32x4) Max(y Uint32x4) Uint32x4
+
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VPMAXUD, CPU Feature: AVX2
+func (x Uint32x8) Max(y Uint32x8) Uint32x8
 
-// GreaterMasked compares for greater than.
+// Max computes the maximum of corresponding elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPMAXUD, CPU Feature: AVX512
+func (x Uint32x16) Max(y Uint32x16) Uint32x16
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x2) Max(y Uint64x2) Uint64x2
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x4) Max(y Uint64x4) Uint64x4
 
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x8) Max(y Uint64x8) Uint64x8
 
-/* IsNan */
+/* Min */
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) IsNan(y Float32x4) Mask32x4
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x4) Min(y Float32x4) Float32x4
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) IsNan(y Float32x8) Mask32x8
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x8) Min(y Float32x8) Float32x8
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNan(y Float32x16) Mask32x16
+// Asm: VMINPS, CPU Feature: AVX512
+func (x Float32x16) Min(y Float32x16) Float32x16
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) IsNan(y Float64x2) Mask64x2
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x2) Min(y Float64x2) Float64x2
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) IsNan(y Float64x4) Mask64x4
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x4) Min(y Float64x4) Float64x4
 
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNan(y Float64x8) Mask64x8
-
-/* IsNanMasked */
+// Asm: VMINPD, CPU Feature: AVX512
+func (x Float64x8) Min(y Float64x8) Float64x8
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VPMINSB, CPU Feature: AVX
+func (x Int8x16) Min(y Int8x16) Int8x16
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VPMINSB, CPU Feature: AVX2
+func (x Int8x32) Min(y Int8x32) Int8x32
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VPMINSB, CPU Feature: AVX512
+func (x Int8x64) Min(y Int8x64) Int8x64
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSW, CPU Feature: AVX
+func (x Int16x8) Min(y Int16x8) Int16x8
+
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VPMINSW, CPU Feature: AVX2
+func (x Int16x16) Min(y Int16x16) Int16x16
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSW, CPU Feature: AVX512
+func (x Int16x32) Min(y Int16x32) Int16x32
+
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VPMINSD, CPU Feature: AVX
+func (x Int32x4) Min(y Int32x4) Int32x4
 
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSD, CPU Feature: AVX2
+func (x Int32x8) Min(y Int32x8) Int32x8
+
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VPMINSD, CPU Feature: AVX512
+func (x Int32x16) Min(y Int32x16) Int32x16
 
-/* Less */
+// Min computes the minimum of corresponding elements.
+//
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x2) Min(y Int64x2) Int64x2
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) Less(y Float32x4) Mask32x4
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x4) Min(y Int64x4) Int64x4
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) Less(y Float32x8) Mask32x8
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x8) Min(y Int64x8) Int64x8
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) Less(y Float32x16) Mask32x16
+// Asm: VPMINUB, CPU Feature: AVX
+func (x Uint8x16) Min(y Uint8x16) Uint8x16
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) Less(y Float64x2) Mask64x2
+// Asm: VPMINUB, CPU Feature: AVX2
+func (x Uint8x32) Min(y Uint8x32) Uint8x32
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) Less(y Float64x4) Mask64x4
+// Asm: VPMINUB, CPU Feature: AVX512
+func (x Uint8x64) Min(y Uint8x64) Uint8x64
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) Less(y Float64x8) Mask64x8
+// Asm: VPMINUW, CPU Feature: AVX
+func (x Uint16x8) Min(y Uint16x8) Uint16x8
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) Less(y Int8x64) Mask8x64
+// Asm: VPMINUW, CPU Feature: AVX2
+func (x Uint16x16) Min(y Uint16x16) Uint16x16
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) Less(y Int16x32) Mask16x32
+// Asm: VPMINUW, CPU Feature: AVX512
+func (x Uint16x32) Min(y Uint16x32) Uint16x32
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) Less(y Int32x16) Mask32x16
+// Asm: VPMINUD, CPU Feature: AVX
+func (x Uint32x4) Min(y Uint32x4) Uint32x4
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) Less(y Int64x8) Mask64x8
+// Asm: VPMINUD, CPU Feature: AVX2
+func (x Uint32x8) Min(y Uint32x8) Uint32x8
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) Less(y Uint8x64) Mask8x64
+// Asm: VPMINUD, CPU Feature: AVX512
+func (x Uint32x16) Min(y Uint32x16) Uint32x16
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) Less(y Uint16x32) Mask16x32
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x2) Min(y Uint64x2) Uint64x2
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) Less(y Uint32x16) Mask32x16
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x4) Min(y Uint64x4) Uint64x4
 
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) Less(y Uint64x8) Mask64x8
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x8) Min(y Uint64x8) Uint64x8
 
-/* LessEqual */
+/* Mul */
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) LessEqual(y Float32x4) Mask32x4
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x4) Mul(y Float32x4) Float32x4
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) LessEqual(y Float32x8) Mask32x8
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x8) Mul(y Float32x8) Float32x8
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqual(y Float32x16) Mask32x16
+// Asm: VMULPS, CPU Feature: AVX512
+func (x Float32x16) Mul(y Float32x16) Float32x16
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) LessEqual(y Float64x2) Mask64x2
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x2) Mul(y Float64x2) Float64x2
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) LessEqual(y Float64x4) Mask64x4
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x4) Mul(y Float64x4) Float64x4
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqual(y Float64x8) Mask64x8
+// Asm: VMULPD, CPU Feature: AVX512
+func (x Float64x8) Mul(y Float64x8) Float64x8
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqual(y Int8x64) Mask8x64
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Int16x8) Mul(y Int16x8) Int16x8
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqual(y Int16x32) Mask16x32
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Int16x16) Mul(y Int16x16) Int16x16
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqual(y Int32x16) Mask32x16
+// Asm: VPMULLW, CPU Feature: AVX512
+func (x Int16x32) Mul(y Int16x32) Int16x32
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqual(y Int64x8) Mask64x8
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Int32x4) Mul(y Int32x4) Int32x4
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Int32x8) Mul(y Int32x8) Int32x8
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
+// Asm: VPMULLD, CPU Feature: AVX512
+func (x Int32x16) Mul(y Int32x16) Int32x16
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x2) Mul(y Int64x2) Int64x2
 
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
-
-/* LessEqualMasked */
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x4) Mul(y Int64x4) Int64x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x8) Mul(y Int64x8) Int64x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Uint16x8) Mul(y Uint16x8) Uint16x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Uint16x16) Mul(y Uint16x16) Uint16x16
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VPMULLW, CPU Feature: AVX512
+func (x Uint16x32) Mul(y Uint16x32) Uint16x32
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Uint32x4) Mul(y Uint32x4) Uint32x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Uint32x8) Mul(y Uint32x8) Uint32x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
+// Asm: VPMULLD, CPU Feature: AVX512
+func (x Uint32x16) Mul(y Uint32x16) Uint32x16
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x2) Mul(y Uint64x2) Uint64x2
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x4) Mul(y Uint64x4) Uint64x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x8) Mul(y Uint64x8) Uint64x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+/* MulAdd */
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+/* MulAddSub */
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
 
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+/* MulEvenWiden */
 
-// LessEqualMasked compares for less than or equal.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULDQ, CPU Feature: AVX
+func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPMULDQ, CPU Feature: AVX2
+func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
 
-// LessEqualMasked compares for less than or equal.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULUDQ, CPU Feature: AVX
+func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPMULUDQ, CPU Feature: AVX2
+func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
 
-// LessEqualMasked compares for less than or equal.
+/* MulHigh */
+
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHW, CPU Feature: AVX
+func (x Int16x8) MulHigh(y Int16x8) Int16x8
+
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPMULHW, CPU Feature: AVX2
+func (x Int16x16) MulHigh(y Int16x16) Int16x16
 
-// LessEqualMasked compares for less than or equal.
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHW, CPU Feature: AVX512
+func (x Int16x32) MulHigh(y Int16x32) Int16x32
+
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPMULHUW, CPU Feature: AVX
+func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
 
-// LessEqualMasked compares for less than or equal.
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHUW, CPU Feature: AVX2
+func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
+
+// MulHigh multiplies elements and stores the high part of the result.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPMULHUW, CPU Feature: AVX512
+func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
 
-/* LessMasked */
+/* MulSubAdd */
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
 
-// LessMasked compares for less than.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
+/* NotEqual */
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) NotEqual(y Float32x4) Mask32x4
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) NotEqual(y Float32x8) Mask32x8
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) NotEqual(y Float32x16) Mask32x16
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) NotEqual(y Float64x2) Mask64x2
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) NotEqual(y Float64x4) Mask64x4
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) NotEqual(y Float64x8) Mask64x8
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) NotEqual(y Int8x64) Mask8x64
 
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPW, CPU Feature: AVX512
+func (x Int16x32) NotEqual(y Int16x32) Mask16x32
+
+// NotEqual compares for inequality.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
+func (x Int32x16) NotEqual(y Int32x16) Mask32x16
 
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
+func (x Int64x8) NotEqual(y Int64x8) Mask64x8
 
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
+
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
 
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
+
+// NotEqual compares for inequality.
 //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPCMPUQ, CPU Feature: AVX512
+func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
+
+/* OnesCount */
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x16) OnesCount() Int8x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x32) OnesCount() Int8x32
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x64) OnesCount() Int8x64
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x8) OnesCount() Int16x8
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x16) OnesCount() Int16x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x32) OnesCount() Int16x32
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x4) OnesCount() Int32x4
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x8) OnesCount() Int32x8
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x16) OnesCount() Int32x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x2) OnesCount() Int64x2
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x4) OnesCount() Int64x4
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x8) OnesCount() Int64x8
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x16) OnesCount() Uint8x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x32) OnesCount() Uint8x32
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x64) OnesCount() Uint8x64
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x8) OnesCount() Uint16x8
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x16) OnesCount() Uint16x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x32) OnesCount() Uint16x32
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x4) OnesCount() Uint32x4
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x8) OnesCount() Uint32x8
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x16) OnesCount() Uint32x16
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x2) OnesCount() Uint64x2
 
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x4) OnesCount() Uint64x4
+
+// OnesCount counts the number of set bits in each element.
 //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x8) OnesCount() Uint64x8
 
-/* Max */
+/* Or */
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x4) Max(y Float32x4) Float32x4
+// Asm: VPOR, CPU Feature: AVX
+func (x Int8x16) Or(y Int8x16) Int8x16
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x8) Max(y Float32x8) Float32x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int8x32) Or(y Int8x32) Int8x32
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) Max(y Float32x16) Float32x16
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int8x64) Or(y Int8x64) Int8x64
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x2) Max(y Float64x2) Float64x2
+// Asm: VPOR, CPU Feature: AVX
+func (x Int16x8) Or(y Int16x8) Int16x8
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x4) Max(y Float64x4) Float64x4
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int16x16) Or(y Int16x16) Int16x16
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) Max(y Float64x8) Float64x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int16x32) Or(y Int16x32) Int16x32
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPMAXSB, CPU Feature: AVX
-func (x Int8x16) Max(y Int8x16) Int8x16
+// Asm: VPOR, CPU Feature: AVX
+func (x Int32x4) Or(y Int32x4) Int32x4
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPMAXSB, CPU Feature: AVX2
-func (x Int8x32) Max(y Int8x32) Int8x32
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int32x8) Or(y Int32x8) Int32x8
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) Max(y Int8x64) Int8x64
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int32x16) Or(y Int32x16) Int32x16
 
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPMAXSW, CPU Feature: AVX
-func (x Int16x8) Max(y Int16x8) Int16x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSW, CPU Feature: AVX2
-func (x Int16x16) Max(y Int16x16) Int16x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) Max(y Int16x32) Int16x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX
-func (x Int32x4) Max(y Int32x4) Int32x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX2
-func (x Int32x8) Max(y Int32x8) Int32x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) Max(y Int32x16) Int32x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) Max(y Int64x2) Int64x2
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) Max(y Int64x4) Int64x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) Max(y Int64x8) Int64x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX
-func (x Uint8x16) Max(y Uint8x16) Uint8x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX2
-func (x Uint8x32) Max(y Uint8x32) Uint8x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) Max(y Uint8x64) Uint8x64
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX
-func (x Uint16x8) Max(y Uint16x8) Uint16x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX2
-func (x Uint16x16) Max(y Uint16x16) Uint16x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) Max(y Uint16x32) Uint16x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX
-func (x Uint32x4) Max(y Uint32x4) Uint32x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX2
-func (x Uint32x8) Max(y Uint32x8) Uint32x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) Max(y Uint32x16) Uint32x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) Max(y Uint64x2) Uint64x2
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) Max(y Uint64x4) Uint64x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) Max(y Uint64x8) Uint64x8
-
-/* MaxMasked */
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Min */
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x4) Min(y Float32x4) Float32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x8) Min(y Float32x8) Float32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) Min(y Float32x16) Float32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x2) Min(y Float64x2) Float64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x4) Min(y Float64x4) Float64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) Min(y Float64x8) Float64x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX
-func (x Int8x16) Min(y Int8x16) Int8x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX2
-func (x Int8x32) Min(y Int8x32) Int8x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) Min(y Int8x64) Int8x64
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX
-func (x Int16x8) Min(y Int16x8) Int16x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX2
-func (x Int16x16) Min(y Int16x16) Int16x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) Min(y Int16x32) Int16x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX
-func (x Int32x4) Min(y Int32x4) Int32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX2
-func (x Int32x8) Min(y Int32x8) Int32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) Min(y Int32x16) Int32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) Min(y Int64x2) Int64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) Min(y Int64x4) Int64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) Min(y Int64x8) Int64x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX
-func (x Uint8x16) Min(y Uint8x16) Uint8x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX2
-func (x Uint8x32) Min(y Uint8x32) Uint8x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) Min(y Uint8x64) Uint8x64
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX
-func (x Uint16x8) Min(y Uint16x8) Uint16x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX2
-func (x Uint16x16) Min(y Uint16x16) Uint16x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) Min(y Uint16x32) Uint16x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX
-func (x Uint32x4) Min(y Uint32x4) Uint32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX2
-func (x Uint32x8) Min(y Uint32x8) Uint32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) Min(y Uint32x16) Uint32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) Min(y Uint64x2) Uint64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) Min(y Uint64x4) Uint64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) Min(y Uint64x8) Uint64x8
-
-/* MinMasked */
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Mul */
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x4) Mul(y Float32x4) Float32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x8) Mul(y Float32x8) Float32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) Mul(y Float32x16) Float32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x2) Mul(y Float64x2) Float64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x4) Mul(y Float64x4) Float64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) Mul(y Float64x8) Float64x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Int16x8) Mul(y Int16x8) Int16x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Int16x16) Mul(y Int16x16) Int16x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) Mul(y Int16x32) Int16x32
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Int32x4) Mul(y Int32x4) Int32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Int32x8) Mul(y Int32x8) Int32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) Mul(y Int32x16) Int32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) Mul(y Int64x2) Int64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) Mul(y Int64x4) Int64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) Mul(y Int64x8) Int64x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Uint16x8) Mul(y Uint16x8) Uint16x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Uint16x16) Mul(y Uint16x16) Uint16x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) Mul(y Uint16x32) Uint16x32
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Uint32x4) Mul(y Uint32x4) Uint32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Uint32x8) Mul(y Uint32x8) Uint32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) Mul(y Uint32x16) Uint32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) Mul(y Uint64x2) Uint64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) Mul(y Uint64x4) Uint64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) Mul(y Uint64x8) Uint64x8
-
-/* MulAdd */
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
-
-/* MulAddMasked */
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* MulAddSub */
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
-
-/* MulAddSubMasked */
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* MulEvenWiden */
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULDQ, CPU Feature: AVX
-func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULDQ, CPU Feature: AVX2
-func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULUDQ, CPU Feature: AVX
-func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULUDQ, CPU Feature: AVX2
-func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
-
-/* MulHigh */
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX
-func (x Int16x8) MulHigh(y Int16x8) Int16x8
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX2
-func (x Int16x16) MulHigh(y Int16x16) Int16x16
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHigh(y Int16x32) Int16x32
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX
-func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX2
-func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
-
-/* MulHighMasked */
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-/* MulMasked */
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x8) MulMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x16) MulMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) MulMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x4) MulMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x8) MulMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) MulMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) MulMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) MulMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x8) MulMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x16) MulMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) MulMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x4) MulMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x8) MulMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) MulMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) MulMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) MulMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) MulMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* MulSubAdd */
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
-
-/* MulSubAddMasked */
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* NotEqual */
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) NotEqual(y Float32x4) Mask32x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) NotEqual(y Float32x8) Mask32x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqual(y Float32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) NotEqual(y Float64x2) Mask64x2
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) NotEqual(y Float64x4) Mask64x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqual(y Float64x8) Mask64x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqual(y Int8x64) Mask8x64
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqual(y Int16x32) Mask16x32
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqual(y Int32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqual(y Int64x8) Mask64x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
-
-/* NotEqualMasked */
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* OnesCount */
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCount() Int8x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCount() Int8x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCount() Int8x64
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCount() Int16x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCount() Int16x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCount() Int16x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCount() Int32x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCount() Int32x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCount() Int32x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCount() Int64x2
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCount() Int64x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCount() Int64x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCount() Uint8x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCount() Uint8x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCount() Uint8x64
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCount() Uint16x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCount() Uint16x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCount() Uint16x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCount() Uint32x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCount() Uint32x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCount() Uint32x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCount() Uint64x2
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCount() Uint64x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCount() Uint64x8
-
-/* OnesCountMasked */
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCountMasked(mask Mask8x16) Int8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCountMasked(mask Mask8x32) Int8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCountMasked(mask Mask8x64) Int8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCountMasked(mask Mask16x8) Int16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCountMasked(mask Mask16x16) Int16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCountMasked(mask Mask16x32) Int16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCountMasked(mask Mask32x4) Int32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCountMasked(mask Mask32x8) Int32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCountMasked(mask Mask32x16) Int32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCountMasked(mask Mask64x2) Int64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCountMasked(mask Mask64x4) Int64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCountMasked(mask Mask64x8) Int64x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCountMasked(mask Mask8x16) Uint8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCountMasked(mask Mask8x32) Uint8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCountMasked(mask Mask8x64) Uint8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCountMasked(mask Mask16x8) Uint16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCountMasked(mask Mask16x16) Uint16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCountMasked(mask Mask16x32) Uint16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCountMasked(mask Mask32x4) Uint32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCountMasked(mask Mask32x8) Uint32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCountMasked(mask Mask32x16) Uint32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCountMasked(mask Mask64x2) Uint64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCountMasked(mask Mask64x4) Uint64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCountMasked(mask Mask64x8) Uint64x8
-
-/* Or */
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int8x16) Or(y Int8x16) Int8x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int8x32) Or(y Int8x32) Int8x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int8x64) Or(y Int8x64) Int8x64
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int16x8) Or(y Int16x8) Int16x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int16x16) Or(y Int16x16) Int16x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int16x32) Or(y Int16x32) Int16x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int32x4) Or(y Int32x4) Int32x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int32x8) Or(y Int32x8) Int32x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) Or(y Int32x16) Int32x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int64x2) Or(y Int64x2) Int64x2
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int64x4) Or(y Int64x4) Int64x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) Or(y Int64x8) Int64x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint8x16) Or(y Uint8x16) Uint8x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint8x32) Or(y Uint8x32) Uint8x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint8x64) Or(y Uint8x64) Uint8x64
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint16x8) Or(y Uint16x8) Uint16x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint16x16) Or(y Uint16x16) Uint16x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint16x32) Or(y Uint16x32) Uint16x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint32x4) Or(y Uint32x4) Uint32x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint32x8) Or(y Uint32x8) Uint32x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) Or(y Uint32x16) Uint32x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint64x2) Or(y Uint64x2) Uint64x2
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint64x4) Or(y Uint64x4) Uint64x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) Or(y Uint64x8) Uint64x8
-
-/* OrMasked */
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Permute */
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute(indices Uint8x16) Int8x16
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute(indices Uint8x32) Int8x32
+// Asm: VPOR, CPU Feature: AVX
+func (x Int64x2) Or(y Int64x2) Int64x2
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int64x4) Or(y Int64x4) Int64x4
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute(indices Uint8x64) Int8x64
+// Asm: VPORQ, CPU Feature: AVX512
+func (x Int64x8) Or(y Int64x8) Int64x8
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint8x16) Or(y Uint8x16) Uint8x16
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) Permute(indices Uint16x8) Int16x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint8x32) Or(y Uint8x32) Uint8x32
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint8x64) Or(y Uint8x64) Uint8x64
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) Permute(indices Uint16x16) Int16x16
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint16x8) Or(y Uint16x8) Uint16x8
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint16x16) Or(y Uint16x16) Uint16x16
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) Permute(indices Uint16x32) Int16x32
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint16x32) Or(y Uint16x32) Uint16x32
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint32x4) Or(y Uint32x4) Uint32x4
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMPS, CPU Feature: AVX2
-func (x Float32x8) Permute(indices Uint32x8) Float32x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint32x8) Or(y Uint32x8) Uint32x8
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMD, CPU Feature: AVX2
-func (x Int32x8) Permute(indices Uint32x8) Int32x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint32x16) Or(y Uint32x16) Uint32x16
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMD, CPU Feature: AVX2
-func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint64x2) Or(y Uint64x2) Uint64x2
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) Permute(indices Uint32x16) Float32x16
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint64x4) Or(y Uint64x4) Uint64x4
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
 //
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) Permute(indices Uint32x16) Int32x16
+// Asm: VPORQ, CPU Feature: AVX512
+func (x Uint64x8) Or(y Uint64x8) Uint64x8
 
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
+/* Permute */
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) Permute(indices Uint64x4) Float64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x16) Permute(indices Uint8x16) Int8x16
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) Permute(indices Uint64x4) Int64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x32) Permute(indices Uint8x32) Int8x32
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) Permute(indices Uint64x8) Float64x8
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
 // Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) Permute(indices Uint64x8) Int64x8
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x64) Permute(indices Uint8x64) Int8x64
 
 // Permute performs a full permutation of vector x using indices:
 // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
-
-/* Permute2 */
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x8) Permute(indices Uint16x8) Int16x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x16) Permute(indices Uint16x16) Int16x16
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x32) Permute(indices Uint16x32) Int16x32
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16
+// Asm: VPERMPS, CPU Feature: AVX2
+func (x Float32x8) Permute(indices Uint32x8) Float32x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Int32x8) Permute(indices Uint32x8) Int32x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2
+// Asm: VPERMPS, CPU Feature: AVX512
+func (x Float32x16) Permute(indices Uint32x16) Float32x16
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2
+// Asm: VPERMD, CPU Feature: AVX512
+func (x Int32x16) Permute(indices Uint32x16) Int32x16
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2
+// Asm: VPERMD, CPU Feature: AVX512
+func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4
+// Asm: VPERMPD, CPU Feature: AVX512
+func (x Float64x4) Permute(indices Uint64x4) Float64x4
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Int64x4) Permute(indices Uint64x4) Int64x4
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8
+// Asm: VPERMPD, CPU Feature: AVX512
+func (x Float64x8) Permute(indices Uint64x8) Float64x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Int64x8) Permute(indices Uint64x8) Int64x8
 
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
 //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
 
-/* Permute2Masked */
+/* Permute2 */
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
+func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
+func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
+func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
+func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
+func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
+func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
+// Only the needed bits to represent xy's index are used in indices' elements.
 //
 // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
+func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
+func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
+func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
+func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
+func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
+func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
+func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
+// Asm: VPERMI2PS, CPU Feature: AVX512
+func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
+// Asm: VPERMI2D, CPU Feature: AVX512
+func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPERMI2D, CPU Feature: AVX512
+func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
+func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
+func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
+func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
+func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4
 
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
 // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
 // where xy is x appending y.
 // Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
-//
 // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
-
-/* PermuteMasked */
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
+func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4
 
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
 //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
+// Asm: VPERMI2PD, CPU Feature: AVX512
+func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8
 
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPERMI2Q, CPU Feature: AVX512
+func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
 //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPERMI2Q, CPU Feature: AVX512
+func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
 
 /* Reciprocal */
 
@@ -8094,50 +3846,6 @@ func (x Float64x4) Reciprocal() Float64x4
 // Asm: VRCP14PD, CPU Feature: AVX512
 func (x Float64x8) Reciprocal() Float64x8
 
-/* ReciprocalMasked */
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalMasked(mask Mask32x4) Float32x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalMasked(mask Mask32x8) Float32x8
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalMasked(mask Mask32x16) Float32x16
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalMasked(mask Mask64x2) Float64x2
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalMasked(mask Mask64x4) Float64x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalMasked(mask Mask64x8) Float64x8
-
 /* ReciprocalSqrt */
 
 // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
@@ -8170,50 +3878,6 @@ func (x Float64x4) ReciprocalSqrt() Float64x4
 // Asm: VRSQRT14PD, CPU Feature: AVX512
 func (x Float64x8) ReciprocalSqrt() Float64x8
 
-/* ReciprocalSqrtMasked */
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalSqrtMasked(mask Mask32x4) Float32x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalSqrtMasked(mask Mask32x8) Float32x8
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalSqrtMasked(mask Mask32x16) Float32x16
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalSqrtMasked(mask Mask64x2) Float64x2
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalSqrtMasked(mask Mask64x4) Float64x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalSqrtMasked(mask Mask64x8) Float64x8
-
 /* RotateAllLeft */
 
 // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
@@ -8300,116 +3964,6 @@ func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
 // Asm: VPROLQ, CPU Feature: AVX512
 func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
 
-/* RotateAllLeftMasked */
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
-
 /* RotateAllRight */
 
 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
@@ -8467,144 +4021,34 @@ func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
 //
 // Asm: VPRORD, CPU Feature: AVX512
 func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
-
-/* RotateAllRightMasked */
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
 
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
 
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
 
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
 //
 // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
 
 /* RotateLeft */
 
@@ -8668,92 +4112,6 @@ func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
 // Asm: VPROLVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
 
-/* RotateLeftMasked */
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RotateRight */
 
 // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
@@ -8816,92 +4174,6 @@ func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
 // Asm: VPRORVQ, CPU Feature: AVX512
 func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
 
-/* RotateRightMasked */
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* RoundToEven */
 
 // RoundToEven rounds elements to the nearest integer.
@@ -8968,62 +4240,6 @@ func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
 
-/* RoundToEvenScaledMasked */
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* RoundToEvenScaledResidue */
 
 // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
@@ -9068,62 +4284,6 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
 
-/* RoundToEvenScaledResidueMasked */
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Scale */
 
 // Scale multiplies elements by a power of 2.
@@ -9131,74 +4291,30 @@ func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Flo
 // Asm: VSCALEFPS, CPU Feature: AVX512
 func (x Float32x4) Scale(y Float32x4) Float32x4
 
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) Scale(y Float32x8) Float32x8
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) Scale(y Float32x16) Float32x16
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) Scale(y Float64x2) Float64x2
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) Scale(y Float64x4) Float64x4
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) Scale(y Float64x8) Float64x8
-
-/* ScaleMasked */
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x4) ScaleMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
 //
 // Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) ScaleMasked(y Float32x8, mask Mask32x8) Float32x8
+func (x Float32x8) Scale(y Float32x8) Float32x8
 
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
 //
 // Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) ScaleMasked(y Float32x16, mask Mask32x16) Float32x16
+func (x Float32x16) Scale(y Float32x16) Float32x16
 
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
 //
 // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) ScaleMasked(y Float64x2, mask Mask64x2) Float64x2
+func (x Float64x2) Scale(y Float64x2) Float64x2
 
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
 //
 // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
+func (x Float64x4) Scale(y Float64x4) Float64x4
 
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
 //
 // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
+func (x Float64x8) Scale(y Float64x8) Float64x8
 
 /* SetElem */
 
@@ -9709,320 +4825,10 @@ func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
 // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
-
-/* ShiftAllLeftConcatMasked */
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllLeftMasked */
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
 
 /* ShiftAllRight */
 
@@ -10217,360 +5023,50 @@ func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
 // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
-
-/* ShiftAllRightConcatMasked */
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllRightMasked */
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
 
 /* ShiftLeft */
 
@@ -10742,311 +5238,37 @@ func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
-
-/* ShiftLeftConcatMasked */
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftLeftMasked */
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
 
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
 
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
 
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
 
 /* ShiftRight */
 
@@ -11187,342 +5409,68 @@ func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
-
-/* ShiftRightConcatMasked */
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftRightMasked */
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
 
 /* Sqrt */
 
@@ -11556,50 +5504,6 @@ func (x Float64x4) Sqrt() Float64x4
 // Asm: VSQRTPD, CPU Feature: AVX512
 func (x Float64x8) Sqrt() Float64x8
 
-/* SqrtMasked */
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
-
 /* Sub */
 
 // Sub subtracts corresponding elements of two vectors.
@@ -11702,267 +5606,55 @@ func (x Uint8x16) Sub(y Uint8x16) Uint8x16
 // Asm: VPSUBB, CPU Feature: AVX2
 func (x Uint8x32) Sub(y Uint8x32) Uint8x32
 
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) Sub(y Uint8x64) Uint8x64
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX
-func (x Uint16x8) Sub(y Uint16x8) Uint16x8
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX2
-func (x Uint16x16) Sub(y Uint16x16) Uint16x16
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) Sub(y Uint16x32) Uint16x32
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX
-func (x Uint32x4) Sub(y Uint32x4) Uint32x4
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX2
-func (x Uint32x8) Sub(y Uint32x8) Uint32x8
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) Sub(y Uint32x16) Uint32x16
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX
-func (x Uint64x2) Sub(y Uint64x2) Uint64x2
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX2
-func (x Uint64x4) Sub(y Uint64x4) Uint64x4
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) Sub(y Uint64x8) Uint64x8
-
-/* SubMasked */
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
+// Sub subtracts corresponding elements of two vectors.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSUBB, CPU Feature: AVX512
+func (x Uint8x64) Sub(y Uint8x64) Uint8x64
+
+// Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPSUBW, CPU Feature: AVX
+func (x Uint16x8) Sub(y Uint16x8) Uint16x8
 
-// SubMasked subtracts corresponding elements of two vectors.
+// Sub subtracts corresponding elements of two vectors.
 //
-// This operation is applied selectively under a write mask.
+// Asm: VPSUBW, CPU Feature: AVX2
+func (x Uint16x16) Sub(y Uint16x16) Uint16x16
+
+// Sub subtracts corresponding elements of two vectors.
 //
 // Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) Sub(y Uint16x32) Uint16x32
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPSUBD, CPU Feature: AVX
+func (x Uint32x4) Sub(y Uint32x4) Uint32x4
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
+// Asm: VPSUBD, CPU Feature: AVX2
+func (x Uint32x8) Sub(y Uint32x8) Uint32x8
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
 // Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) Sub(y Uint32x16) Uint32x16
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSUBQ, CPU Feature: AVX
+func (x Uint64x2) Sub(y Uint64x2) Uint64x2
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSUBQ, CPU Feature: AVX2
+func (x Uint64x4) Sub(y Uint64x4) Uint64x4
 
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
 //
 // Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) Sub(y Uint64x8) Uint64x8
 
 /* SubPairs */
 
@@ -12114,92 +5806,6 @@ func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
 // Asm: VPSUBUSW, CPU Feature: AVX512
 func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
 
-/* SubSaturatedMasked */
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x16) SubSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x32) SubSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x64) SubSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x8) SubSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x16) SubSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x32) SubSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
 /* Trunc */
 
 // Trunc truncates elements towards zero.
@@ -12266,62 +5872,6 @@ func (x Float64x4) TruncScaled(prec uint8) Float64x4
 // Asm: VRNDSCALEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaled(prec uint8) Float64x8
 
-/* TruncScaledMasked */
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* TruncScaledResidue */
 
 // TruncScaledResidue computes the difference after truncating with specified precision.
@@ -12366,62 +5916,6 @@ func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
 // Asm: VREDUCEPD, CPU Feature: AVX512
 func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
 
-/* TruncScaledResidueMasked */
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
 /* Xor */
 
 // Xor performs a bitwise XOR operation between two vectors.
@@ -12544,92 +6038,6 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
 // Asm: VPXORQ, CPU Feature: AVX512
 func (x Uint64x8) Xor(y Uint64x8) Uint64x8
 
-/* XorMasked */
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
 /* blend */
 
 // blend blends two vectors based on mask values, choosing either
index 3faeeaccfdefabdb3c18ef701c4dd59150302ff1..c88fe4b9fef58e6ffc42d42e833bdd1d8be47d7b 100644 (file)
@@ -43,7 +43,7 @@ func TestType(t *testing.T) {
                return
        }
        v.z = maskT(simd.Mask32x4FromBits(0b0011))
-       *v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z))
+       *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
 
        got := [4]int32{}
        v.y.Store(&got)
@@ -121,7 +121,7 @@ func TestMaskConversion(t *testing.T) {
        }
        x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
        mask := simd.Int32x4{}.Sub(x).ToMask()
-       y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).AddMasked(x, mask)
+       y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
        want := [4]int32{6, 0, 10, 0}
        got := make([]int32, 4)
        y.StoreSlice(got)
@@ -327,7 +327,7 @@ func TestBitMaskLoad(t *testing.T) {
        results := [2]int64{}
        want := [2]int64{0, 6}
        m := simd.LoadMask64x2FromBits(&bits)
-       simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+       simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
        for i := range 2 {
                if results[i] != want[i] {
                        t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
@@ -359,7 +359,7 @@ func TestBitMaskFromBits(t *testing.T) {
        results := [2]int64{}
        want := [2]int64{0, 6}
        m := simd.Mask64x2FromBits(0b10)
-       simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+       simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
        for i := range 2 {
                if results[i] != want[i] {
                        t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])