This CL also added some tests for them.
This CL is generated by CL 687919.
Change-Id: I9ddd2cd23bb98ecca91bfbeaffd62faa4bd85e0d
Reviewed-on: https://go-review.googlesource.com/c/go/+/687939
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
ssa.OpAMD64VPHSUBW256,
ssa.OpAMD64VPHSUBD128,
ssa.OpAMD64VPHSUBD256,
+ ssa.OpAMD64VPERMB128,
+ ssa.OpAMD64VPERMB256,
+ ssa.OpAMD64VPERMB512,
+ ssa.OpAMD64VPERMW128,
+ ssa.OpAMD64VPERMW256,
+ ssa.OpAMD64VPERMW512,
+ ssa.OpAMD64VPERMPS256,
+ ssa.OpAMD64VPERMD256,
+ ssa.OpAMD64VPERMPS512,
+ ssa.OpAMD64VPERMD512,
+ ssa.OpAMD64VPERMPD256,
+ ssa.OpAMD64VPERMQ256,
+ ssa.OpAMD64VPERMPD512,
+ ssa.OpAMD64VPERMQ512,
ssa.OpAMD64VPROLVD128,
ssa.OpAMD64VPROLVD256,
ssa.OpAMD64VPROLVD512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPERMBMasked128,
+ ssa.OpAMD64VPERMBMasked256,
+ ssa.OpAMD64VPERMBMasked512,
+ ssa.OpAMD64VPERMWMasked128,
+ ssa.OpAMD64VPERMWMasked256,
+ ssa.OpAMD64VPERMWMasked512,
+ ssa.OpAMD64VPERMPSMasked256,
+ ssa.OpAMD64VPERMDMasked256,
+ ssa.OpAMD64VPERMPSMasked512,
+ ssa.OpAMD64VPERMDMasked512,
+ ssa.OpAMD64VPERMPDMasked256,
+ ssa.OpAMD64VPERMQMasked256,
+ ssa.OpAMD64VPERMPDMasked512,
+ ssa.OpAMD64VPERMQMasked512,
ssa.OpAMD64VPROLVDMasked128,
ssa.OpAMD64VPROLVDMasked256,
ssa.OpAMD64VPROLVDMasked512,
ssa.OpAMD64VPDPWSSD128,
ssa.OpAMD64VPDPWSSD256,
ssa.OpAMD64VPDPWSSD512,
+ ssa.OpAMD64VPERMI2B128,
+ ssa.OpAMD64VPERMI2B256,
+ ssa.OpAMD64VPERMI2B512,
+ ssa.OpAMD64VPERMI2W128,
+ ssa.OpAMD64VPERMI2W256,
+ ssa.OpAMD64VPERMI2W512,
+ ssa.OpAMD64VPERMI2PS128,
+ ssa.OpAMD64VPERMI2D128,
+ ssa.OpAMD64VPERMI2PS256,
+ ssa.OpAMD64VPERMI2D256,
+ ssa.OpAMD64VPERMI2PS512,
+ ssa.OpAMD64VPERMI2D512,
+ ssa.OpAMD64VPERMI2PD128,
+ ssa.OpAMD64VPERMI2Q128,
+ ssa.OpAMD64VPERMI2PD256,
+ ssa.OpAMD64VPERMI2Q256,
+ ssa.OpAMD64VPERMI2PD512,
+ ssa.OpAMD64VPERMI2Q512,
ssa.OpAMD64VPDPWSSDS128,
ssa.OpAMD64VPDPWSSDS256,
ssa.OpAMD64VPDPWSSDS512,
ssa.OpAMD64VPDPWSSDMasked128,
ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPDPWSSDMasked512,
+ ssa.OpAMD64VPERMI2BMasked128,
+ ssa.OpAMD64VPERMI2BMasked256,
+ ssa.OpAMD64VPERMI2BMasked512,
+ ssa.OpAMD64VPERMI2WMasked128,
+ ssa.OpAMD64VPERMI2WMasked256,
+ ssa.OpAMD64VPERMI2WMasked512,
+ ssa.OpAMD64VPERMI2PSMasked128,
+ ssa.OpAMD64VPERMI2DMasked128,
+ ssa.OpAMD64VPERMI2PSMasked256,
+ ssa.OpAMD64VPERMI2DMasked256,
+ ssa.OpAMD64VPERMI2PSMasked512,
+ ssa.OpAMD64VPERMI2DMasked512,
+ ssa.OpAMD64VPERMI2PDMasked128,
+ ssa.OpAMD64VPERMI2QMasked128,
+ ssa.OpAMD64VPERMI2PDMasked256,
+ ssa.OpAMD64VPERMI2QMasked256,
+ ssa.OpAMD64VPERMI2PDMasked512,
+ ssa.OpAMD64VPERMI2QMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPERMI2BMasked128,
+ ssa.OpAMD64VPERMI2BMasked256,
+ ssa.OpAMD64VPERMI2BMasked512,
+ ssa.OpAMD64VPERMI2WMasked128,
+ ssa.OpAMD64VPERMI2WMasked256,
+ ssa.OpAMD64VPERMI2WMasked512,
+ ssa.OpAMD64VPERMI2PSMasked128,
+ ssa.OpAMD64VPERMI2DMasked128,
+ ssa.OpAMD64VPERMI2PSMasked256,
+ ssa.OpAMD64VPERMI2DMasked256,
+ ssa.OpAMD64VPERMI2PSMasked512,
+ ssa.OpAMD64VPERMI2DMasked512,
+ ssa.OpAMD64VPERMI2PDMasked128,
+ ssa.OpAMD64VPERMI2QMasked128,
+ ssa.OpAMD64VPERMI2PDMasked256,
+ ssa.OpAMD64VPERMI2QMasked256,
+ ssa.OpAMD64VPERMI2PDMasked512,
+ ssa.OpAMD64VPERMI2QMasked512,
+ ssa.OpAMD64VPERMBMasked128,
+ ssa.OpAMD64VPERMBMasked256,
+ ssa.OpAMD64VPERMBMasked512,
+ ssa.OpAMD64VPERMWMasked128,
+ ssa.OpAMD64VPERMWMasked256,
+ ssa.OpAMD64VPERMWMasked512,
+ ssa.OpAMD64VPERMPSMasked256,
+ ssa.OpAMD64VPERMDMasked256,
+ ssa.OpAMD64VPERMPSMasked512,
+ ssa.OpAMD64VPERMDMasked512,
+ ssa.OpAMD64VPERMPDMasked256,
+ ssa.OpAMD64VPERMQMasked256,
+ ssa.OpAMD64VPERMPDMasked512,
+ ssa.OpAMD64VPERMQMasked512,
ssa.OpAMD64VPOPCNTBMasked128,
ssa.OpAMD64VPOPCNTBMasked256,
ssa.OpAMD64VPOPCNTBMasked512,
(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...)
(PairwiseSubUint32x4 ...) => (VPHSUBD128 ...)
(PairwiseSubUint32x8 ...) => (VPHSUBD256 ...)
+(PermuteFloat32x8 ...) => (VPERMPS256 ...)
+(PermuteFloat32x16 ...) => (VPERMPS512 ...)
+(PermuteFloat64x4 ...) => (VPERMPD256 ...)
+(PermuteFloat64x8 ...) => (VPERMPD512 ...)
+(PermuteInt8x16 ...) => (VPERMB128 ...)
+(PermuteInt8x32 ...) => (VPERMB256 ...)
+(PermuteInt8x64 ...) => (VPERMB512 ...)
+(PermuteInt16x8 ...) => (VPERMW128 ...)
+(PermuteInt16x16 ...) => (VPERMW256 ...)
+(PermuteInt16x32 ...) => (VPERMW512 ...)
+(PermuteInt32x8 ...) => (VPERMD256 ...)
+(PermuteInt32x16 ...) => (VPERMD512 ...)
+(PermuteInt64x4 ...) => (VPERMQ256 ...)
+(PermuteInt64x8 ...) => (VPERMQ512 ...)
+(PermuteUint8x16 ...) => (VPERMB128 ...)
+(PermuteUint8x32 ...) => (VPERMB256 ...)
+(PermuteUint8x64 ...) => (VPERMB512 ...)
+(PermuteUint16x8 ...) => (VPERMW128 ...)
+(PermuteUint16x16 ...) => (VPERMW256 ...)
+(PermuteUint16x32 ...) => (VPERMW512 ...)
+(PermuteUint32x8 ...) => (VPERMD256 ...)
+(PermuteUint32x16 ...) => (VPERMD512 ...)
+(PermuteUint64x4 ...) => (VPERMQ256 ...)
+(PermuteUint64x8 ...) => (VPERMQ512 ...)
+(Permute2Float32x4 ...) => (VPERMI2PS128 ...)
+(Permute2Float32x8 ...) => (VPERMI2PS256 ...)
+(Permute2Float32x16 ...) => (VPERMI2PS512 ...)
+(Permute2Float64x2 ...) => (VPERMI2PD128 ...)
+(Permute2Float64x4 ...) => (VPERMI2PD256 ...)
+(Permute2Float64x8 ...) => (VPERMI2PD512 ...)
+(Permute2Int8x16 ...) => (VPERMI2B128 ...)
+(Permute2Int8x32 ...) => (VPERMI2B256 ...)
+(Permute2Int8x64 ...) => (VPERMI2B512 ...)
+(Permute2Int16x8 ...) => (VPERMI2W128 ...)
+(Permute2Int16x16 ...) => (VPERMI2W256 ...)
+(Permute2Int16x32 ...) => (VPERMI2W512 ...)
+(Permute2Int32x4 ...) => (VPERMI2D128 ...)
+(Permute2Int32x8 ...) => (VPERMI2D256 ...)
+(Permute2Int32x16 ...) => (VPERMI2D512 ...)
+(Permute2Int64x2 ...) => (VPERMI2Q128 ...)
+(Permute2Int64x4 ...) => (VPERMI2Q256 ...)
+(Permute2Int64x8 ...) => (VPERMI2Q512 ...)
+(Permute2Uint8x16 ...) => (VPERMI2B128 ...)
+(Permute2Uint8x32 ...) => (VPERMI2B256 ...)
+(Permute2Uint8x64 ...) => (VPERMI2B512 ...)
+(Permute2Uint16x8 ...) => (VPERMI2W128 ...)
+(Permute2Uint16x16 ...) => (VPERMI2W256 ...)
+(Permute2Uint16x32 ...) => (VPERMI2W512 ...)
+(Permute2Uint32x4 ...) => (VPERMI2D128 ...)
+(Permute2Uint32x8 ...) => (VPERMI2D256 ...)
+(Permute2Uint32x16 ...) => (VPERMI2D512 ...)
+(Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
+(Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
+(Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
+(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(PopCountInt8x16 ...) => (VPOPCNTB128 ...)
(PopCountInt8x32 ...) => (VPOPCNTB256 ...)
(PopCountInt8x64 ...) => (VPOPCNTB512 ...)
{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMW256", argLength: 2, reg: w21, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMI2W256", argLength: 3, reg: w31, asm: "VPERMI2W", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2WMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2W", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMWMasked256", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMW512", argLength: 2, reg: w21, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMI2W512", argLength: 3, reg: w31, asm: "VPERMI2W", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2WMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2W", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMWMasked512", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLW512", argLength: 2, reg: wfpw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMW128", argLength: 2, reg: w21, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMI2W128", argLength: 3, reg: w31, asm: "VPERMI2W", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2WMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2W", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMWMasked128", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMPS512", argLength: 2, reg: w21, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMD512", argLength: 2, reg: w21, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMI2D512", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2PS512", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2DMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2PSMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMPSMasked512", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMDMasked512", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMI2D128", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2PS128", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2PSMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2DMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMINUQ128", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUQMasked128", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMI2PD128", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2Q128", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2QMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2PDMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMPDMasked256", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMINUQMasked512", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMPD512", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMQ512", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMI2Q512", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUBMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUB128", argLength: 2, reg: v21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUBMasked128", argLength: 3, reg: w2kw, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMB128", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPERMI2B128", argLength: 3, reg: w31, asm: "VPERMI2B", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2BMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2B", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMBMasked128", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSW128", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMADDUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPAVGB256", argLength: 2, reg: v21, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUBMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUB256", argLength: 2, reg: v21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUBMasked256", argLength: 3, reg: w2kw, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMB256", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPERMI2B256", argLength: 3, reg: w31, asm: "VPERMI2B", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2BMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2B", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMBMasked256", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSW256", argLength: 2, reg: v21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMADDUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPAVGB512", argLength: 2, reg: w21, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUB512", argLength: 2, reg: w21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUBMasked512", argLength: 3, reg: w2kw, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMB512", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMI2B512", argLength: 3, reg: w31, asm: "VPERMI2B", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMI2BMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2B", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPERMBMasked512", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSW512", argLength: 2, reg: w21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMADDUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VRNDSCALEPS512", argLength: 1, reg: w11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "OrUint16x16", argLength: 2, commutative: true},
{name: "PairwiseAddUint16x16", argLength: 2, commutative: false},
{name: "PairwiseSubUint16x16", argLength: 2, commutative: false},
+ {name: "PermuteInt16x16", argLength: 2, commutative: false},
+ {name: "PermuteUint16x16", argLength: 2, commutative: false},
+ {name: "Permute2Uint16x16", argLength: 3, commutative: false},
+ {name: "Permute2Int16x16", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
+ {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
{name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
{name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
{name: "NotEqualUint16x32", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
+ {name: "PermuteUint16x32", argLength: 2, commutative: false},
+ {name: "PermuteInt16x32", argLength: 2, commutative: false},
+ {name: "Permute2Int16x32", argLength: 3, commutative: false},
+ {name: "Permute2Uint16x32", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
+ {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
{name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
{name: "OrUint16x8", argLength: 2, commutative: true},
{name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
+ {name: "PermuteUint16x8", argLength: 2, commutative: false},
+ {name: "PermuteInt16x8", argLength: 2, commutative: false},
+ {name: "Permute2Int16x8", argLength: 3, commutative: false},
+ {name: "Permute2Uint16x8", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
+ {name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
{name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
{name: "OrUint32x16", argLength: 2, commutative: true},
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
+ {name: "PermuteInt32x16", argLength: 2, commutative: false},
+ {name: "PermuteUint32x16", argLength: 2, commutative: false},
+ {name: "PermuteFloat32x16", argLength: 2, commutative: false},
+ {name: "Permute2Int32x16", argLength: 3, commutative: false},
+ {name: "Permute2Uint32x16", argLength: 3, commutative: false},
+ {name: "Permute2Float32x16", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
+ {name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
{name: "PopCountUint32x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
{name: "RotateLeftUint32x16", argLength: 2, commutative: false},
{name: "OrMaskedUint32x4", argLength: 3, commutative: true},
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
+ {name: "Permute2Uint32x4", argLength: 3, commutative: false},
+ {name: "Permute2Float32x4", argLength: 3, commutative: false},
+ {name: "Permute2Int32x4", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
{name: "PopCountUint32x4", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
{name: "RotateLeftUint32x4", argLength: 2, commutative: false},
{name: "OrMaskedUint32x8", argLength: 3, commutative: true},
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
+ {name: "PermuteInt32x8", argLength: 2, commutative: false},
+ {name: "PermuteFloat32x8", argLength: 2, commutative: false},
+ {name: "PermuteUint32x8", argLength: 2, commutative: false},
+ {name: "Permute2Uint32x8", argLength: 3, commutative: false},
+ {name: "Permute2Float32x8", argLength: 3, commutative: false},
+ {name: "Permute2Int32x8", argLength: 3, commutative: false},
+ {name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
+ {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
+ {name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
{name: "PopCountUint32x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x8", argLength: 2, commutative: false},
{name: "RotateLeftUint32x8", argLength: 2, commutative: false},
{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
{name: "OrUint64x2", argLength: 2, commutative: true},
{name: "OrMaskedUint64x2", argLength: 3, commutative: true},
+ {name: "Permute2Uint64x2", argLength: 3, commutative: false},
+ {name: "Permute2Int64x2", argLength: 3, commutative: false},
+ {name: "Permute2Float64x2", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
{name: "PopCountUint64x2", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
{name: "RotateLeftUint64x2", argLength: 2, commutative: false},
{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
{name: "OrUint64x4", argLength: 2, commutative: true},
{name: "OrMaskedUint64x4", argLength: 3, commutative: true},
+ {name: "PermuteUint64x4", argLength: 2, commutative: false},
+ {name: "PermuteInt64x4", argLength: 2, commutative: false},
+ {name: "PermuteFloat64x4", argLength: 2, commutative: false},
+ {name: "Permute2Uint64x4", argLength: 3, commutative: false},
+ {name: "Permute2Int64x4", argLength: 3, commutative: false},
+ {name: "Permute2Float64x4", argLength: 3, commutative: false},
+ {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
+ {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
+ {name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
{name: "PopCountUint64x4", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
{name: "OrUint64x8", argLength: 2, commutative: true},
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
+ {name: "PermuteUint64x8", argLength: 2, commutative: false},
+ {name: "PermuteInt64x8", argLength: 2, commutative: false},
+ {name: "PermuteFloat64x8", argLength: 2, commutative: false},
+ {name: "Permute2Int64x8", argLength: 3, commutative: false},
+ {name: "Permute2Uint64x8", argLength: 3, commutative: false},
+ {name: "Permute2Float64x8", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
+ {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
{name: "PopCountUint64x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
{name: "NotEqualUint8x16", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true},
{name: "OrUint8x16", argLength: 2, commutative: true},
+ {name: "PermuteUint8x16", argLength: 2, commutative: false},
+ {name: "PermuteInt8x16", argLength: 2, commutative: false},
+ {name: "Permute2Uint8x16", argLength: 3, commutative: false},
+ {name: "Permute2Int8x16", argLength: 3, commutative: false},
+ {name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
+ {name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
+ {name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
{name: "PopCountUint8x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint8x16", argLength: 2, commutative: false},
{name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
{name: "NotEqualUint8x32", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true},
{name: "OrUint8x32", argLength: 2, commutative: true},
+ {name: "PermuteUint8x32", argLength: 2, commutative: false},
+ {name: "PermuteInt8x32", argLength: 2, commutative: false},
+ {name: "Permute2Int8x32", argLength: 3, commutative: false},
+ {name: "Permute2Uint8x32", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
+ {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
{name: "PopCountUint8x32", argLength: 1, commutative: false},
{name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
{name: "MinMaskedUint8x64", argLength: 3, commutative: true},
{name: "NotEqualUint8x64", argLength: 2, commutative: true},
{name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
+ {name: "PermuteUint8x64", argLength: 2, commutative: false},
+ {name: "PermuteInt8x64", argLength: 2, commutative: false},
+ {name: "Permute2Int8x64", argLength: 3, commutative: false},
+ {name: "Permute2Uint8x64", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
+ {name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
+ {name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
{name: "PopCountUint8x64", argLength: 1, commutative: false},
{name: "PopCountMaskedUint8x64", argLength: 2, commutative: false},
{name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
OpAMD64VPMINUWMasked256
OpAMD64VPMULHUW256
OpAMD64VPMULHUWMasked256
+ OpAMD64VPERMW256
+ OpAMD64VPERMI2W256
+ OpAMD64VPERMI2WMasked256
+ OpAMD64VPERMWMasked256
OpAMD64VPSRLW256
OpAMD64VPSRLWMasked256
OpAMD64VPSRLVW256
OpAMD64VPMINUWMasked512
OpAMD64VPMULHUW512
OpAMD64VPMULHUWMasked512
+ OpAMD64VPERMW512
+ OpAMD64VPERMI2W512
+ OpAMD64VPERMI2WMasked512
+ OpAMD64VPERMWMasked512
OpAMD64VPSRLW512
OpAMD64VPSRLWMasked512
OpAMD64VPSRLVW512
OpAMD64VPMINUWMasked128
OpAMD64VPMULHUW128
OpAMD64VPMULHUWMasked128
+ OpAMD64VPERMW128
+ OpAMD64VPERMI2W128
+ OpAMD64VPERMI2WMasked128
+ OpAMD64VPERMWMasked128
OpAMD64VPSRLW128
OpAMD64VPSRLWMasked128
OpAMD64VPSRLVW128
OpAMD64VPMAXUDMasked512
OpAMD64VPMINUD512
OpAMD64VPMINUDMasked512
+ OpAMD64VPERMPS512
+ OpAMD64VPERMD512
+ OpAMD64VPERMI2D512
+ OpAMD64VPERMI2PS512
+ OpAMD64VPERMI2DMasked512
+ OpAMD64VPERMI2PSMasked512
+ OpAMD64VPERMPSMasked512
+ OpAMD64VPERMDMasked512
OpAMD64VPSRLD512
OpAMD64VPSRLDMasked512
OpAMD64VPSRLVD512
OpAMD64VPMINUD128
OpAMD64VPMINUDMasked128
OpAMD64VPMULUDQ128
+ OpAMD64VPERMI2D128
+ OpAMD64VPERMI2PS128
+ OpAMD64VPERMI2PSMasked128
+ OpAMD64VPERMI2DMasked128
OpAMD64VPSRLD128
OpAMD64VPSRLDMasked128
OpAMD64VPSRLVD128
OpAMD64VPMINUD256
OpAMD64VPMINUDMasked256
OpAMD64VPMULUDQ256
+ OpAMD64VPERMD256
+ OpAMD64VPERMPS256
+ OpAMD64VPERMI2D256
+ OpAMD64VPERMI2PS256
+ OpAMD64VPERMI2PSMasked256
+ OpAMD64VPERMI2DMasked256
+ OpAMD64VPERMPSMasked256
+ OpAMD64VPERMDMasked256
OpAMD64VPSRLD256
OpAMD64VPSRLDMasked256
OpAMD64VPSRLVD256
OpAMD64VPMINUQ128
OpAMD64VPMINUQMasked128
OpAMD64VPMULUDQMasked128
+ OpAMD64VPERMI2PD128
+ OpAMD64VPERMI2Q128
+ OpAMD64VPERMI2QMasked128
+ OpAMD64VPERMI2PDMasked128
OpAMD64VPSRLQ128
OpAMD64VPSRLQMasked128
OpAMD64VPSRLVQ128
OpAMD64VPMINUQ256
OpAMD64VPMINUQMasked256
OpAMD64VPMULUDQMasked256
+ OpAMD64VPERMQ256
+ OpAMD64VPERMPD256
+ OpAMD64VPERMI2PD256
+ OpAMD64VPERMI2Q256
+ OpAMD64VPERMI2PDMasked256
+ OpAMD64VPERMI2QMasked256
+ OpAMD64VPERMPDMasked256
+ OpAMD64VPERMQMasked256
OpAMD64VPSRLQ256
OpAMD64VPSRLQMasked256
OpAMD64VPSRLVQ256
OpAMD64VPMINUQMasked512
OpAMD64VPMULUDQ512
OpAMD64VPMULUDQMasked512
+ OpAMD64VPERMPD512
+ OpAMD64VPERMQ512
+ OpAMD64VPERMI2Q512
+ OpAMD64VPERMI2PD512
+ OpAMD64VPERMI2QMasked512
+ OpAMD64VPERMI2PDMasked512
+ OpAMD64VPERMPDMasked512
+ OpAMD64VPERMQMasked512
OpAMD64VPSRLQ512
OpAMD64VPSRLQMasked512
OpAMD64VPSRLVQ512
OpAMD64VPMAXUBMasked128
OpAMD64VPMINUB128
OpAMD64VPMINUBMasked128
+ OpAMD64VPERMB128
+ OpAMD64VPERMI2B128
+ OpAMD64VPERMI2BMasked128
+ OpAMD64VPERMBMasked128
OpAMD64VPMADDUBSW128
OpAMD64VPMADDUBSWMasked128
OpAMD64VPAVGB256
OpAMD64VPMAXUBMasked256
OpAMD64VPMINUB256
OpAMD64VPMINUBMasked256
+ OpAMD64VPERMB256
+ OpAMD64VPERMI2B256
+ OpAMD64VPERMI2BMasked256
+ OpAMD64VPERMBMasked256
OpAMD64VPMADDUBSW256
OpAMD64VPMADDUBSWMasked256
OpAMD64VPAVGB512
OpAMD64VPMAXUBMasked512
OpAMD64VPMINUB512
OpAMD64VPMINUBMasked512
+ OpAMD64VPERMB512
+ OpAMD64VPERMI2B512
+ OpAMD64VPERMI2BMasked512
+ OpAMD64VPERMBMasked512
OpAMD64VPMADDUBSW512
OpAMD64VPMADDUBSWMasked512
OpAMD64VRNDSCALEPS512
OpOrUint16x16
OpPairwiseAddUint16x16
OpPairwiseSubUint16x16
+ OpPermuteInt16x16
+ OpPermuteUint16x16
+ OpPermute2Uint16x16
+ OpPermute2Int16x16
+ OpPermute2MaskedUint16x16
+ OpPermute2MaskedInt16x16
+ OpPermuteMaskedUint16x16
+ OpPermuteMaskedInt16x16
OpPopCountUint16x16
OpPopCountMaskedUint16x16
OpSaturatedAddUint16x16
OpMulHighMaskedUint16x32
OpNotEqualUint16x32
OpNotEqualMaskedUint16x32
+ OpPermuteUint16x32
+ OpPermuteInt16x32
+ OpPermute2Int16x32
+ OpPermute2Uint16x32
+ OpPermute2MaskedUint16x32
+ OpPermute2MaskedInt16x32
+ OpPermuteMaskedUint16x32
+ OpPermuteMaskedInt16x32
OpPopCountUint16x32
OpPopCountMaskedUint16x32
OpSaturatedAddUint16x32
OpOrUint16x8
OpPairwiseAddUint16x8
OpPairwiseSubUint16x8
+ OpPermuteUint16x8
+ OpPermuteInt16x8
+ OpPermute2Int16x8
+ OpPermute2Uint16x8
+ OpPermute2MaskedUint16x8
+ OpPermute2MaskedInt16x8
+ OpPermuteMaskedInt16x8
+ OpPermuteMaskedUint16x8
OpPopCountUint16x8
OpPopCountMaskedUint16x8
OpSaturatedAddUint16x8
OpNotEqualMaskedUint32x16
OpOrUint32x16
OpOrMaskedUint32x16
+ OpPermuteInt32x16
+ OpPermuteUint32x16
+ OpPermuteFloat32x16
+ OpPermute2Int32x16
+ OpPermute2Uint32x16
+ OpPermute2Float32x16
+ OpPermute2MaskedUint32x16
+ OpPermute2MaskedInt32x16
+ OpPermute2MaskedFloat32x16
+ OpPermuteMaskedUint32x16
+ OpPermuteMaskedInt32x16
+ OpPermuteMaskedFloat32x16
OpPopCountUint32x16
OpPopCountMaskedUint32x16
OpRotateLeftUint32x16
OpOrMaskedUint32x4
OpPairwiseAddUint32x4
OpPairwiseSubUint32x4
+ OpPermute2Uint32x4
+ OpPermute2Float32x4
+ OpPermute2Int32x4
+ OpPermute2MaskedUint32x4
+ OpPermute2MaskedInt32x4
+ OpPermute2MaskedFloat32x4
OpPopCountUint32x4
OpPopCountMaskedUint32x4
OpRotateLeftUint32x4
OpOrMaskedUint32x8
OpPairwiseAddUint32x8
OpPairwiseSubUint32x8
+ OpPermuteInt32x8
+ OpPermuteFloat32x8
+ OpPermuteUint32x8
+ OpPermute2Uint32x8
+ OpPermute2Float32x8
+ OpPermute2Int32x8
+ OpPermute2MaskedFloat32x8
+ OpPermute2MaskedUint32x8
+ OpPermute2MaskedInt32x8
+ OpPermuteMaskedInt32x8
+ OpPermuteMaskedFloat32x8
+ OpPermuteMaskedUint32x8
OpPopCountUint32x8
OpPopCountMaskedUint32x8
OpRotateLeftUint32x8
OpNotEqualMaskedUint64x2
OpOrUint64x2
OpOrMaskedUint64x2
+ OpPermute2Uint64x2
+ OpPermute2Int64x2
+ OpPermute2Float64x2
+ OpPermute2MaskedUint64x2
+ OpPermute2MaskedInt64x2
+ OpPermute2MaskedFloat64x2
OpPopCountUint64x2
OpPopCountMaskedUint64x2
OpRotateLeftUint64x2
OpNotEqualMaskedUint64x4
OpOrUint64x4
OpOrMaskedUint64x4
+ OpPermuteUint64x4
+ OpPermuteInt64x4
+ OpPermuteFloat64x4
+ OpPermute2Uint64x4
+ OpPermute2Int64x4
+ OpPermute2Float64x4
+ OpPermute2MaskedInt64x4
+ OpPermute2MaskedUint64x4
+ OpPermute2MaskedFloat64x4
+ OpPermuteMaskedFloat64x4
+ OpPermuteMaskedInt64x4
+ OpPermuteMaskedUint64x4
OpPopCountUint64x4
OpPopCountMaskedUint64x4
OpRotateLeftUint64x4
OpNotEqualMaskedUint64x8
OpOrUint64x8
OpOrMaskedUint64x8
+ OpPermuteUint64x8
+ OpPermuteInt64x8
+ OpPermuteFloat64x8
+ OpPermute2Int64x8
+ OpPermute2Uint64x8
+ OpPermute2Float64x8
+ OpPermute2MaskedUint64x8
+ OpPermute2MaskedInt64x8
+ OpPermute2MaskedFloat64x8
+ OpPermuteMaskedFloat64x8
+ OpPermuteMaskedInt64x8
+ OpPermuteMaskedUint64x8
OpPopCountUint64x8
OpPopCountMaskedUint64x8
OpRotateLeftUint64x8
OpNotEqualUint8x16
OpNotEqualMaskedUint8x16
OpOrUint8x16
+ OpPermuteUint8x16
+ OpPermuteInt8x16
+ OpPermute2Uint8x16
+ OpPermute2Int8x16
+ OpPermute2MaskedInt8x16
+ OpPermute2MaskedUint8x16
+ OpPermuteMaskedInt8x16
+ OpPermuteMaskedUint8x16
OpPopCountUint8x16
OpPopCountMaskedUint8x16
OpSaturatedAddUint8x16
OpNotEqualUint8x32
OpNotEqualMaskedUint8x32
OpOrUint8x32
+ OpPermuteUint8x32
+ OpPermuteInt8x32
+ OpPermute2Int8x32
+ OpPermute2Uint8x32
+ OpPermute2MaskedUint8x32
+ OpPermute2MaskedInt8x32
+ OpPermuteMaskedUint8x32
+ OpPermuteMaskedInt8x32
OpPopCountUint8x32
OpPopCountMaskedUint8x32
OpSaturatedAddUint8x32
OpMinMaskedUint8x64
OpNotEqualUint8x64
OpNotEqualMaskedUint8x64
+ OpPermuteUint8x64
+ OpPermuteInt8x64
+ OpPermute2Int8x64
+ OpPermute2Uint8x64
+ OpPermute2MaskedUint8x64
+ OpPermute2MaskedInt8x64
+ OpPermuteMaskedInt8x64
+ OpPermuteMaskedUint8x64
OpPopCountUint8x64
OpPopCountMaskedUint8x64
OpSaturatedAddUint8x64
},
},
},
+ {
+ name: "VPERMW256",
+ argLen: 2,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2W256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2WMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMWMasked256",
+ argLen: 3,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLW256",
argLen: 2,
},
},
},
+ {
+ name: "VPERMW512",
+ argLen: 2,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2W512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2WMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMWMasked512",
+ argLen: 3,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLW512",
argLen: 2,
},
},
},
+ {
+ name: "VPERMW128",
+ argLen: 2,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2W128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2WMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMWMasked128",
+ argLen: 3,
+ asm: x86.AVPERMW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLW128",
argLen: 2,
},
},
},
+ {
+ name: "VPERMPS512",
+ argLen: 2,
+ asm: x86.AVPERMPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMD512",
+ argLen: 2,
+ asm: x86.AVPERMD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2D512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PS512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2DMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PSMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMPSMasked512",
+ argLen: 3,
+ asm: x86.AVPERMPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMDMasked512",
+ argLen: 3,
+ asm: x86.AVPERMD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLD512",
argLen: 2,
},
},
},
+ {
+ name: "VPERMI2D128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PS128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PSMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2DMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLD128",
argLen: 2,
},
},
},
+ {
+ name: "VPERMD256",
+ argLen: 2,
+ asm: x86.AVPERMD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMPS256",
+ argLen: 2,
+ asm: x86.AVPERMPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2D256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PS256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PSMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2DMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2D,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMPSMasked256",
+ argLen: 3,
+ asm: x86.AVPERMPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMDMasked256",
+ argLen: 3,
+ asm: x86.AVPERMD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLD256",
argLen: 2,
},
},
},
+ {
+ name: "VPERMI2PD128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2Q128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2QMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PDMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLQ128",
argLen: 2,
},
},
},
+ {
+ name: "VPERMQ256",
+ argLen: 2,
+ asm: x86.AVPERMQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMPD256",
+ argLen: 2,
+ asm: x86.AVPERMPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2PD256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2Q256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PDMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2QMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMPDMasked256",
+ argLen: 3,
+ asm: x86.AVPERMPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMQMasked256",
+ argLen: 3,
+ asm: x86.AVPERMQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLQ256",
argLen: 2,
},
},
},
+ {
+ name: "VPERMPD512",
+ argLen: 2,
+ asm: x86.AVPERMPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMQ512",
+ argLen: 2,
+ asm: x86.AVPERMQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2Q512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PD512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2QMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2Q,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2PDMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2PD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMPDMasked512",
+ argLen: 3,
+ asm: x86.AVPERMPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMQMasked512",
+ argLen: 3,
+ asm: x86.AVPERMQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSRLQ512",
argLen: 2,
},
},
},
+ {
+ name: "VPERMB128",
+ argLen: 2,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2B128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2BMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMBMasked128",
+ argLen: 3,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMADDUBSW128",
argLen: 2,
},
},
},
+ {
+ name: "VPERMB256",
+ argLen: 2,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2B256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2BMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMBMasked256",
+ argLen: 3,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMADDUBSW256",
argLen: 2,
},
},
},
+ {
+ name: "VPERMB512",
+ argLen: 2,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPERMI2B512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMI2BMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPERMI2B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPERMBMasked512",
+ argLen: 3,
+ asm: x86.AVPERMB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMADDUBSW512",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "PermuteInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint16x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt16x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt16x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint16x16",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint16x32",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt16x32",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt16x32",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint16x32",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "PermuteUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint16x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt16x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint16x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint16x8",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteInt32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteUint32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteFloat32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedFloat32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint32x16",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "Permute2Uint32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "PopCountUint32x4",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "PermuteInt32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteFloat32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteUint32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedFloat32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint32x8",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "Permute2Uint64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint64x2",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt64x2",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat64x2",
+ argLen: 4,
+ generic: true,
+ },
{
name: "PopCountUint64x2",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteFloat64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedFloat64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint64x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint64x4",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteFloat64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Float64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint64x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt64x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedFloat64x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedFloat64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint64x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint64x8",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Int8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt8x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint8x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint8x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint8x16",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint8x32",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt8x32",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt8x32",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint8x32",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "PermuteUint8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "PermuteInt8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Permute2Int8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2Uint8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedUint8x64",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "Permute2MaskedInt8x64",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedInt8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "PermuteMaskedUint8x64",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountUint8x64",
argLen: 1,
return true
case OpPanicBounds:
return rewriteValueAMD64_OpPanicBounds(v)
+ case OpPermute2Float32x16:
+ v.Op = OpAMD64VPERMI2PS512
+ return true
+ case OpPermute2Float32x4:
+ v.Op = OpAMD64VPERMI2PS128
+ return true
+ case OpPermute2Float32x8:
+ v.Op = OpAMD64VPERMI2PS256
+ return true
+ case OpPermute2Float64x2:
+ v.Op = OpAMD64VPERMI2PD128
+ return true
+ case OpPermute2Float64x4:
+ v.Op = OpAMD64VPERMI2PD256
+ return true
+ case OpPermute2Float64x8:
+ v.Op = OpAMD64VPERMI2PD512
+ return true
+ case OpPermute2Int16x16:
+ v.Op = OpAMD64VPERMI2W256
+ return true
+ case OpPermute2Int16x32:
+ v.Op = OpAMD64VPERMI2W512
+ return true
+ case OpPermute2Int16x8:
+ v.Op = OpAMD64VPERMI2W128
+ return true
+ case OpPermute2Int32x16:
+ v.Op = OpAMD64VPERMI2D512
+ return true
+ case OpPermute2Int32x4:
+ v.Op = OpAMD64VPERMI2D128
+ return true
+ case OpPermute2Int32x8:
+ v.Op = OpAMD64VPERMI2D256
+ return true
+ case OpPermute2Int64x2:
+ v.Op = OpAMD64VPERMI2Q128
+ return true
+ case OpPermute2Int64x4:
+ v.Op = OpAMD64VPERMI2Q256
+ return true
+ case OpPermute2Int64x8:
+ v.Op = OpAMD64VPERMI2Q512
+ return true
+ case OpPermute2Int8x16:
+ v.Op = OpAMD64VPERMI2B128
+ return true
+ case OpPermute2Int8x32:
+ v.Op = OpAMD64VPERMI2B256
+ return true
+ case OpPermute2Int8x64:
+ v.Op = OpAMD64VPERMI2B512
+ return true
+ case OpPermute2MaskedFloat32x16:
+ return rewriteValueAMD64_OpPermute2MaskedFloat32x16(v)
+ case OpPermute2MaskedFloat32x4:
+ return rewriteValueAMD64_OpPermute2MaskedFloat32x4(v)
+ case OpPermute2MaskedFloat32x8:
+ return rewriteValueAMD64_OpPermute2MaskedFloat32x8(v)
+ case OpPermute2MaskedFloat64x2:
+ return rewriteValueAMD64_OpPermute2MaskedFloat64x2(v)
+ case OpPermute2MaskedFloat64x4:
+ return rewriteValueAMD64_OpPermute2MaskedFloat64x4(v)
+ case OpPermute2MaskedFloat64x8:
+ return rewriteValueAMD64_OpPermute2MaskedFloat64x8(v)
+ case OpPermute2MaskedInt16x16:
+ return rewriteValueAMD64_OpPermute2MaskedInt16x16(v)
+ case OpPermute2MaskedInt16x32:
+ return rewriteValueAMD64_OpPermute2MaskedInt16x32(v)
+ case OpPermute2MaskedInt16x8:
+ return rewriteValueAMD64_OpPermute2MaskedInt16x8(v)
+ case OpPermute2MaskedInt32x16:
+ return rewriteValueAMD64_OpPermute2MaskedInt32x16(v)
+ case OpPermute2MaskedInt32x4:
+ return rewriteValueAMD64_OpPermute2MaskedInt32x4(v)
+ case OpPermute2MaskedInt32x8:
+ return rewriteValueAMD64_OpPermute2MaskedInt32x8(v)
+ case OpPermute2MaskedInt64x2:
+ return rewriteValueAMD64_OpPermute2MaskedInt64x2(v)
+ case OpPermute2MaskedInt64x4:
+ return rewriteValueAMD64_OpPermute2MaskedInt64x4(v)
+ case OpPermute2MaskedInt64x8:
+ return rewriteValueAMD64_OpPermute2MaskedInt64x8(v)
+ case OpPermute2MaskedInt8x16:
+ return rewriteValueAMD64_OpPermute2MaskedInt8x16(v)
+ case OpPermute2MaskedInt8x32:
+ return rewriteValueAMD64_OpPermute2MaskedInt8x32(v)
+ case OpPermute2MaskedInt8x64:
+ return rewriteValueAMD64_OpPermute2MaskedInt8x64(v)
+ case OpPermute2MaskedUint16x16:
+ return rewriteValueAMD64_OpPermute2MaskedUint16x16(v)
+ case OpPermute2MaskedUint16x32:
+ return rewriteValueAMD64_OpPermute2MaskedUint16x32(v)
+ case OpPermute2MaskedUint16x8:
+ return rewriteValueAMD64_OpPermute2MaskedUint16x8(v)
+ case OpPermute2MaskedUint32x16:
+ return rewriteValueAMD64_OpPermute2MaskedUint32x16(v)
+ case OpPermute2MaskedUint32x4:
+ return rewriteValueAMD64_OpPermute2MaskedUint32x4(v)
+ case OpPermute2MaskedUint32x8:
+ return rewriteValueAMD64_OpPermute2MaskedUint32x8(v)
+ case OpPermute2MaskedUint64x2:
+ return rewriteValueAMD64_OpPermute2MaskedUint64x2(v)
+ case OpPermute2MaskedUint64x4:
+ return rewriteValueAMD64_OpPermute2MaskedUint64x4(v)
+ case OpPermute2MaskedUint64x8:
+ return rewriteValueAMD64_OpPermute2MaskedUint64x8(v)
+ case OpPermute2MaskedUint8x16:
+ return rewriteValueAMD64_OpPermute2MaskedUint8x16(v)
+ case OpPermute2MaskedUint8x32:
+ return rewriteValueAMD64_OpPermute2MaskedUint8x32(v)
+ case OpPermute2MaskedUint8x64:
+ return rewriteValueAMD64_OpPermute2MaskedUint8x64(v)
+ case OpPermute2Uint16x16:
+ v.Op = OpAMD64VPERMI2W256
+ return true
+ case OpPermute2Uint16x32:
+ v.Op = OpAMD64VPERMI2W512
+ return true
+ case OpPermute2Uint16x8:
+ v.Op = OpAMD64VPERMI2W128
+ return true
+ case OpPermute2Uint32x16:
+ v.Op = OpAMD64VPERMI2D512
+ return true
+ case OpPermute2Uint32x4:
+ v.Op = OpAMD64VPERMI2D128
+ return true
+ case OpPermute2Uint32x8:
+ v.Op = OpAMD64VPERMI2D256
+ return true
+ case OpPermute2Uint64x2:
+ v.Op = OpAMD64VPERMI2Q128
+ return true
+ case OpPermute2Uint64x4:
+ v.Op = OpAMD64VPERMI2Q256
+ return true
+ case OpPermute2Uint64x8:
+ v.Op = OpAMD64VPERMI2Q512
+ return true
+ case OpPermute2Uint8x16:
+ v.Op = OpAMD64VPERMI2B128
+ return true
+ case OpPermute2Uint8x32:
+ v.Op = OpAMD64VPERMI2B256
+ return true
+ case OpPermute2Uint8x64:
+ v.Op = OpAMD64VPERMI2B512
+ return true
+ case OpPermuteFloat32x16:
+ v.Op = OpAMD64VPERMPS512
+ return true
+ case OpPermuteFloat32x8:
+ v.Op = OpAMD64VPERMPS256
+ return true
+ case OpPermuteFloat64x4:
+ v.Op = OpAMD64VPERMPD256
+ return true
+ case OpPermuteFloat64x8:
+ v.Op = OpAMD64VPERMPD512
+ return true
+ case OpPermuteInt16x16:
+ v.Op = OpAMD64VPERMW256
+ return true
+ case OpPermuteInt16x32:
+ v.Op = OpAMD64VPERMW512
+ return true
+ case OpPermuteInt16x8:
+ v.Op = OpAMD64VPERMW128
+ return true
+ case OpPermuteInt32x16:
+ v.Op = OpAMD64VPERMD512
+ return true
+ case OpPermuteInt32x8:
+ v.Op = OpAMD64VPERMD256
+ return true
+ case OpPermuteInt64x4:
+ v.Op = OpAMD64VPERMQ256
+ return true
+ case OpPermuteInt64x8:
+ v.Op = OpAMD64VPERMQ512
+ return true
+ case OpPermuteInt8x16:
+ v.Op = OpAMD64VPERMB128
+ return true
+ case OpPermuteInt8x32:
+ v.Op = OpAMD64VPERMB256
+ return true
+ case OpPermuteInt8x64:
+ v.Op = OpAMD64VPERMB512
+ return true
+ case OpPermuteMaskedFloat32x16:
+ return rewriteValueAMD64_OpPermuteMaskedFloat32x16(v)
+ case OpPermuteMaskedFloat32x8:
+ return rewriteValueAMD64_OpPermuteMaskedFloat32x8(v)
+ case OpPermuteMaskedFloat64x4:
+ return rewriteValueAMD64_OpPermuteMaskedFloat64x4(v)
+ case OpPermuteMaskedFloat64x8:
+ return rewriteValueAMD64_OpPermuteMaskedFloat64x8(v)
+ case OpPermuteMaskedInt16x16:
+ return rewriteValueAMD64_OpPermuteMaskedInt16x16(v)
+ case OpPermuteMaskedInt16x32:
+ return rewriteValueAMD64_OpPermuteMaskedInt16x32(v)
+ case OpPermuteMaskedInt16x8:
+ return rewriteValueAMD64_OpPermuteMaskedInt16x8(v)
+ case OpPermuteMaskedInt32x16:
+ return rewriteValueAMD64_OpPermuteMaskedInt32x16(v)
+ case OpPermuteMaskedInt32x8:
+ return rewriteValueAMD64_OpPermuteMaskedInt32x8(v)
+ case OpPermuteMaskedInt64x4:
+ return rewriteValueAMD64_OpPermuteMaskedInt64x4(v)
+ case OpPermuteMaskedInt64x8:
+ return rewriteValueAMD64_OpPermuteMaskedInt64x8(v)
+ case OpPermuteMaskedInt8x16:
+ return rewriteValueAMD64_OpPermuteMaskedInt8x16(v)
+ case OpPermuteMaskedInt8x32:
+ return rewriteValueAMD64_OpPermuteMaskedInt8x32(v)
+ case OpPermuteMaskedInt8x64:
+ return rewriteValueAMD64_OpPermuteMaskedInt8x64(v)
+ case OpPermuteMaskedUint16x16:
+ return rewriteValueAMD64_OpPermuteMaskedUint16x16(v)
+ case OpPermuteMaskedUint16x32:
+ return rewriteValueAMD64_OpPermuteMaskedUint16x32(v)
+ case OpPermuteMaskedUint16x8:
+ return rewriteValueAMD64_OpPermuteMaskedUint16x8(v)
+ case OpPermuteMaskedUint32x16:
+ return rewriteValueAMD64_OpPermuteMaskedUint32x16(v)
+ case OpPermuteMaskedUint32x8:
+ return rewriteValueAMD64_OpPermuteMaskedUint32x8(v)
+ case OpPermuteMaskedUint64x4:
+ return rewriteValueAMD64_OpPermuteMaskedUint64x4(v)
+ case OpPermuteMaskedUint64x8:
+ return rewriteValueAMD64_OpPermuteMaskedUint64x8(v)
+ case OpPermuteMaskedUint8x16:
+ return rewriteValueAMD64_OpPermuteMaskedUint8x16(v)
+ case OpPermuteMaskedUint8x32:
+ return rewriteValueAMD64_OpPermuteMaskedUint8x32(v)
+ case OpPermuteMaskedUint8x64:
+ return rewriteValueAMD64_OpPermuteMaskedUint8x64(v)
+ case OpPermuteUint16x16:
+ v.Op = OpAMD64VPERMW256
+ return true
+ case OpPermuteUint16x32:
+ v.Op = OpAMD64VPERMW512
+ return true
+ case OpPermuteUint16x8:
+ v.Op = OpAMD64VPERMW128
+ return true
+ case OpPermuteUint32x16:
+ v.Op = OpAMD64VPERMD512
+ return true
+ case OpPermuteUint32x8:
+ v.Op = OpAMD64VPERMD256
+ return true
+ case OpPermuteUint64x4:
+ v.Op = OpAMD64VPERMQ256
+ return true
+ case OpPermuteUint64x8:
+ v.Op = OpAMD64VPERMQ512
+ return true
+ case OpPermuteUint8x16:
+ v.Op = OpAMD64VPERMB128
+ return true
+ case OpPermuteUint8x32:
+ v.Op = OpAMD64VPERMB256
+ return true
+ case OpPermuteUint8x64:
+ v.Op = OpAMD64VPERMB512
+ return true
case OpPopCount16:
return rewriteValueAMD64_OpPopCount16(v)
case OpPopCount32:
}
return false
}
+func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat32x16 x y z mask)
+ // result: (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedFloat32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat32x4 x y z mask)
+ // result: (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedFloat32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat32x8 x y z mask)
+ // result: (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedFloat64x2(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat64x2 x y z mask)
+ // result: (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedFloat64x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat64x4 x y z mask)
+ // result: (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedFloat64x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedFloat64x8 x y z mask)
+ // result: (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2PDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt16x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt16x16 x y z mask)
+ // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt16x32(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt16x32 x y z mask)
+ // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt16x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt16x8 x y z mask)
+ // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt32x16 x y z mask)
+ // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt32x4 x y z mask)
+ // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt32x8 x y z mask)
+ // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt64x2(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt64x2 x y z mask)
+ // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt64x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt64x4 x y z mask)
+ // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt64x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt64x8 x y z mask)
+ // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt8x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt8x16 x y z mask)
+ // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt8x32(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt8x32 x y z mask)
+ // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedInt8x64(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedInt8x64 x y z mask)
+ // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint16x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint16x16 x y z mask)
+ // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint16x32(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint16x32 x y z mask)
+ // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint16x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint16x8 x y z mask)
+ // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2WMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint32x16 x y z mask)
+ // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint32x4 x y z mask)
+ // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint32x8 x y z mask)
+ // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2DMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint64x2(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint64x2 x y z mask)
+ // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint64x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint64x4 x y z mask)
+ // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint64x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint64x8 x y z mask)
+ // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2QMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint8x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint8x16 x y z mask)
+ // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint8x32(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint8x32 x y z mask)
+ // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermute2MaskedUint8x64(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Permute2MaskedUint8x64 x y z mask)
+ // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPERMI2BMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedFloat32x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedFloat32x16 x y mask)
+ // result: (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMPSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedFloat32x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedFloat32x8 x y mask)
+ // result: (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMPSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedFloat64x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedFloat64x4 x y mask)
+ // result: (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMPDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedFloat64x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedFloat64x8 x y mask)
+ // result: (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMPDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt16x16 x y mask)
+ // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt16x32 x y mask)
+ // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt16x8 x y mask)
+ // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt32x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt32x16 x y mask)
+ // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt32x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt32x8 x y mask)
+ // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt64x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt64x4 x y mask)
+ // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt64x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt64x8 x y mask)
+ // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt8x16 x y mask)
+ // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt8x32 x y mask)
+ // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedInt8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedInt8x64 x y mask)
+ // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint16x16 x y mask)
+ // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint16x32 x y mask)
+ // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint16x8 x y mask)
+ // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint32x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint32x16 x y mask)
+ // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint32x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint32x8 x y mask)
+ // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint64x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint64x4 x y mask)
+ // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint64x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint64x8 x y mask)
+ // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint8x16 x y mask)
+ // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint8x32 x y mask)
+ // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpPermuteMaskedUint8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (PermuteMaskedUint8x64 x y mask)
+ // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPERMBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpPopCount16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
}
}
+func opLen2_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue2(op, t, args[1], args[0])
+ }
+}
+
func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(op, t, args[0], args[1], args[2])
}
}
+func opLen3_21(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue3(op, t, args[1], args[0], args[2])
+ }
+}
+
+func opLen3_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue3(op, t, args[2], args[0], args[1])
+ }
+}
+
func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue4(op, t, args[0], args[1], args[2], args[3])
}
}
+func opLen4_231(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue4(op, t, args[2], args[0], args[1], args[3])
+ }
+}
+
func plainPanicSimdImm(s *state) {
cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
cmp.AuxInt = 0
addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x32.Permute", opLen2_21(ssa.OpPermuteUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Permute", opLen2_21(ssa.OpPermuteInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x64.Permute", opLen2_21(ssa.OpPermuteUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Permute", opLen2_21(ssa.OpPermuteInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Permute", opLen2_21(ssa.OpPermuteUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Permute", opLen2_21(ssa.OpPermuteInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Permute", opLen2_21(ssa.OpPermuteUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Permute", opLen2_21(ssa.OpPermuteInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Permute", opLen2_21(ssa.OpPermuteUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x8.Permute", opLen2_21(ssa.OpPermuteFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.Permute", opLen2_21(ssa.OpPermuteInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Permute", opLen2_21(ssa.OpPermuteUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Permute", opLen2_21(ssa.OpPermuteFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.Permute", opLen2_21(ssa.OpPermuteInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Permute", opLen2_21(ssa.OpPermuteUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x4.Permute", opLen2_21(ssa.OpPermuteFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x4.Permute", opLen2_21(ssa.OpPermuteInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Permute", opLen2_21(ssa.OpPermuteUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Permute", opLen2_21(ssa.OpPermuteFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x8.Permute", opLen2_21(ssa.OpPermuteInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Permute", opLen2_21(ssa.OpPermuteUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Permute2", opLen3_231(ssa.OpPermute2Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Permute2", opLen3_231(ssa.OpPermute2Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.Permute2", opLen3_231(ssa.OpPermute2Int8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x32.Permute2", opLen3_231(ssa.OpPermute2Uint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Permute2", opLen3_231(ssa.OpPermute2Int8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x64.Permute2", opLen3_231(ssa.OpPermute2Uint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Permute2", opLen3_231(ssa.OpPermute2Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Permute2", opLen3_231(ssa.OpPermute2Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Permute2", opLen3_231(ssa.OpPermute2Int16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Permute2", opLen3_231(ssa.OpPermute2Uint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Permute2", opLen3_231(ssa.OpPermute2Int16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Permute2", opLen3_231(ssa.OpPermute2Uint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Permute2", opLen3_231(ssa.OpPermute2Float32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Permute2", opLen3_231(ssa.OpPermute2Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Permute2", opLen3_231(ssa.OpPermute2Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.Permute2", opLen3_231(ssa.OpPermute2Float32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.Permute2", opLen3_231(ssa.OpPermute2Int32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Permute2", opLen3_231(ssa.OpPermute2Uint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Permute2", opLen3_231(ssa.OpPermute2Float32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.Permute2", opLen3_231(ssa.OpPermute2Int32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Permute2", opLen3_231(ssa.OpPermute2Uint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Permute2", opLen3_231(ssa.OpPermute2Float64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Permute2", opLen3_231(ssa.OpPermute2Int64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Permute2", opLen3_231(ssa.OpPermute2Uint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.Permute2", opLen3_231(ssa.OpPermute2Float64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x4.Permute2", opLen3_231(ssa.OpPermute2Int64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Permute2", opLen3_231(ssa.OpPermute2Uint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.PopCount", opLen1(ssa.OpPopCountInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.PopCount", opLen1(ssa.OpPopCountInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.PopCount", opLen1(ssa.OpPopCountInt8x64, types.TypeVec512), sys.AMD64)
// Asm: VPHSUBD, CPU Feature: AVX2
func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
+/* Permute */
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x16) Permute(indices Uint8x16) Int8x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x32) Permute(indices Uint8x32) Int8x32
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x64) Permute(indices Uint8x64) Int8x64
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x8) Permute(indices Uint16x8) Int16x8
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x16) Permute(indices Uint16x16) Int16x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x32) Permute(indices Uint16x32) Int16x32
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
+
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPS, CPU Feature: AVX2
+func (x Float32x8) Permute(indices Uint32x8) Float32x8
+
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Int32x8) Permute(indices Uint32x8) Int32x8
+
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPS, CPU Feature: AVX512F
+func (x Float32x16) Permute(indices Uint32x16) Float32x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Int32x16) Permute(indices Uint32x16) Int32x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x4) Permute(indices Uint64x4) Float64x4
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x4) Permute(indices Uint64x4) Int64x4
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x8) Permute(indices Uint64x8) Float64x8
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x8) Permute(indices Uint64x8) Int64x8
+
+// Permute performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
+
+/* Permute2 */
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
+
+/* Permute2Masked */
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, u Mask8x16) Int8x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, u Mask8x16) Uint8x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, u Mask8x32) Int8x32
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, u Mask8x32) Uint8x32
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, u Mask8x64) Int8x64
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2B, CPU Feature: AVX512VBMI
+func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, u Mask8x64) Uint8x64
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, u Mask16x8) Int16x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, u Mask16x8) Uint16x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, u Mask16x16) Int16x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, u Mask16x16) Uint16x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, u Mask16x32) Int16x32
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2W, CPU Feature: AVX512BW
+func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, u Mask16x32) Uint16x32
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, u Mask32x4) Float32x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, u Mask32x4) Int32x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, u Mask32x4) Uint32x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, u Mask32x8) Float32x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, u Mask32x8) Int32x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, u Mask32x8) Uint32x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PS, CPU Feature: AVX512F
+func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, u Mask32x16) Float32x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, u Mask32x16) Int32x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2D, CPU Feature: AVX512F
+func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, u Mask32x16) Uint32x16
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, u Mask64x2) Float64x2
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, u Mask64x2) Int64x2
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, u Mask64x2) Uint64x2
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, u Mask64x4) Float64x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, u Mask64x4) Int64x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, u Mask64x4) Uint64x4
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2PD, CPU Feature: AVX512F
+func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, u Mask64x8) Float64x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, u Mask64x8) Int64x8
+
+// Permute2Masked performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
+//
+// Asm: VPERMI2Q, CPU Feature: AVX512F
+func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, u Mask64x8) Uint64x8
+
+/* PermuteMasked */
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Int8x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x16) PermuteMasked(indices Uint8x16, z Mask8x16) Uint8x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Int8x32
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x32) PermuteMasked(indices Uint8x32, z Mask8x32) Uint8x32
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Int8x64
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x64) PermuteMasked(indices Uint8x64, z Mask8x64) Uint8x64
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Int16x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x8) PermuteMasked(indices Uint16x8, z Mask16x8) Uint16x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Int16x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x16) PermuteMasked(indices Uint16x16, z Mask16x16) Uint16x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Int16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Int16x32
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMW, CPU Feature: AVX512BW
+func (x Uint16x32) PermuteMasked(indices Uint16x32, z Mask16x32) Uint16x32
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPS, CPU Feature: AVX512F
+func (x Float32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Float32x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Int32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Int32x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Uint32x8) PermuteMasked(indices Uint32x8, z Mask32x8) Uint32x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPS, CPU Feature: AVX512F
+func (x Float32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Float32x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Int32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Int32x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Uint32x16) PermuteMasked(indices Uint32x16, z Mask32x16) Uint32x16
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Float64x4
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Int64x4
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x4) PermuteMasked(indices Uint64x4, z Mask64x4) Uint64x4
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Float64x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Int64x8
+
+// PermuteMasked performs a full permutation of vector y using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
+//
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x8) PermuteMasked(indices Uint64x8, z Mask64x8) Uint64x8
+
/* PopCount */
// PopCount counts the number of set bits in each element.
testInt32x4BinaryMasked(t, []int32{1, 2, 3, 4}, []int32{5, 6, 7, 8}, []int32{-1, -1, 0, 0}, []int32{6, 8, 0, 0}, "AddMasked")
}
+func TestPermute(t *testing.T) {
+ if !simd.HasAVX512() {
+ t.Skip("Test requires HasAVX512, not available on this hardware")
+ return
+ }
+ x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
+ indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
+ want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
+ got := make([]int64, 8)
+ simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got)
+ for i := range 8 {
+ if want[i] != got[i] {
+ t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
+ }
+ }
+}
+
+func TestPermute2(t *testing.T) {
+ if !simd.HasAVX512() {
+ t.Skip("Test requires HasAVX512, not available on this hardware")
+ return
+ }
+ x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
+ y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
+ indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
+ want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
+ got := make([]int64, 8)
+ simd.LoadInt64x8Slice(x).Permute2(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
+ for i := range 8 {
+ if want[i] != got[i] {
+ t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
+ }
+ }
+}
+
// checkInt8Slices ensures that b and a are equal, to the end of b.
// also serves to use the slices, to prevent accidental optimization.
func checkInt8Slices(t *testing.T, a, b []int8) {
// GaloisFieldAffineTransformMasked
// Get128
// GetElem
+// Permute
+// Permute2
+// Permute2Masked
+// PermuteMasked
// RotateAllLeft
// RotateAllLeftMasked
// RotateAllRight