This CL is generated by CL 680215.
Change-Id: Ie085e65e0473a8e96170702d7265d379ec8812ba
Reviewed-on: https://go-review.googlesource.com/c/go/+/681298
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
ssa.OpAMD64VPCMPBMasked512:
p = simdFp2k1k1Imm8(s, v)
+ case ssa.OpAMD64VPDPWSSD128,
+ ssa.OpAMD64VPDPWSSD256,
+ ssa.OpAMD64VPDPWSSD512,
+ ssa.OpAMD64VPDPWSSDS128,
+ ssa.OpAMD64VPDPWSSDS256,
+ ssa.OpAMD64VPDPWSSDS512,
+ ssa.OpAMD64VPDPBUSDS128,
+ ssa.OpAMD64VPDPBUSDS256,
+ ssa.OpAMD64VPDPBUSDS512,
+ ssa.OpAMD64VPDPBUSD128,
+ ssa.OpAMD64VPDPBUSD256,
+ ssa.OpAMD64VPDPBUSD512:
+ p = simdFp31ResultInArg0(s, v)
+
+ case ssa.OpAMD64VPDPWSSDMasked512,
+ ssa.OpAMD64VPDPWSSDMasked128,
+ ssa.OpAMD64VPDPWSSDMasked256,
+ ssa.OpAMD64VPDPWSSDSMasked512,
+ ssa.OpAMD64VPDPWSSDSMasked128,
+ ssa.OpAMD64VPDPWSSDSMasked256,
+ ssa.OpAMD64VPDPBUSDSMasked512,
+ ssa.OpAMD64VPDPBUSDSMasked128,
+ ssa.OpAMD64VPDPBUSDSMasked256,
+ ssa.OpAMD64VPDPBUSDMasked512,
+ ssa.OpAMD64VPDPBUSDMasked128,
+ ssa.OpAMD64VPDPBUSDMasked256:
+ p = simdFp3k1fp1ResultInArg0(s, v)
+
default:
// Unknown reg shape
return false
ssa.OpAMD64VPMADDWDMasked256,
ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPMADDWDMasked128,
+ ssa.OpAMD64VPDPWSSDMasked512,
+ ssa.OpAMD64VPDPWSSDMasked128,
+ ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPOPCNTWMasked256,
ssa.OpAMD64VPOPCNTWMasked512,
ssa.OpAMD64VPOPCNTWMasked128,
ssa.OpAMD64VPADDSBMasked128,
ssa.OpAMD64VPADDSBMasked256,
ssa.OpAMD64VPADDSBMasked512,
+ ssa.OpAMD64VPDPWSSDSMasked512,
+ ssa.OpAMD64VPDPWSSDSMasked128,
+ ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPSUBSWMasked256,
ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPSUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
+ ssa.OpAMD64VPDPBUSDSMasked512,
+ ssa.OpAMD64VPDPBUSDSMasked128,
+ ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VSQRTPSMasked512,
ssa.OpAMD64VSQRTPSMasked128,
ssa.OpAMD64VSQRTPSMasked256,
ssa.OpAMD64VPSUBBMasked128,
ssa.OpAMD64VPSUBBMasked256,
ssa.OpAMD64VPSUBBMasked512,
+ ssa.OpAMD64VPDPBUSDMasked512,
+ ssa.OpAMD64VPDPBUSDMasked128,
+ ssa.OpAMD64VPDPBUSDMasked256,
ssa.OpAMD64VXORPSMasked512,
ssa.OpAMD64VXORPSMasked128,
ssa.OpAMD64VXORPSMasked256,
(MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSaturatedAddUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(MaskedSaturatedAddUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(MaskedSaturatedAddUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MaskedSaturatedSubInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedSaturatedSubInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedSaturatedSubInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(MaskedXorFloat32x4 x y mask) => (VXORPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(MaskedXorFloat32x8 x y mask) => (VXORPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
+(PairDotProdAccumulateInt32x16 ...) => (VPDPWSSD512 ...)
+(PairDotProdAccumulateInt32x4 ...) => (VPDPWSSD128 ...)
+(PairDotProdAccumulateInt32x8 ...) => (VPDPWSSD256 ...)
(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
(SaturatedAddUint8x16 ...) => (VPADDSB128 ...)
(SaturatedAddUint8x32 ...) => (VPADDSB256 ...)
(SaturatedAddUint8x64 ...) => (VPADDSB512 ...)
+(SaturatedPairDotProdAccumulateInt32x16 ...) => (VPDPWSSDS512 ...)
+(SaturatedPairDotProdAccumulateInt32x4 ...) => (VPDPWSSDS128 ...)
+(SaturatedPairDotProdAccumulateInt32x8 ...) => (VPDPWSSDS256 ...)
(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...)
(SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...)
(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
+(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
(SignInt16x16 ...) => (VPSIGNW256 ...)
(SignInt16x8 ...) => (VPSIGNW128 ...)
(SignInt32x4 ...) => (VPSIGND128 ...)
(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
+(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
+(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
+(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
+(UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...)
+(UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...)
+(UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...)
(XorFloat32x16 ...) => (VXORPS512 ...)
(XorFloat32x4 ...) => (VXORPS128 ...)
(XorFloat32x8 ...) => (VXORPS256 ...)
{name: "VPMINSDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPORDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPWSSDMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPBUSDMasked512", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPXORDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXSD512", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINSD512", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLD512", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPORD512", argLength: 2, reg: fp21, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPWSSD512", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPOPCNTD512", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPWSSDS512", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPDPBUSDS512", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPSUBD512", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPDPBUSD512", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPXORD512", argLength: 2, reg: fp21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPABSD128", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPADDD128", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINSDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPORDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPWSSDMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPOPCNTDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSUBDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPBUSDMasked128", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPXORDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXSD128", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINSD128", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULDQ128", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLD128", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPWSSD128", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPHADDD128", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHSUBD128", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTD128", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPWSSDS128", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPDPBUSDS128", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSIGND128", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBD128", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPDPBUSD128", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPABSD256", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPADDD256", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPCMPEQD256", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINSDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPORDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPWSSDMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPOPCNTDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSUBDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPBUSDMasked256", argLength: 4, reg: fp3k1fp1, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPXORDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXSD256", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINSD256", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULDQ256", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLD256", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPWSSD256", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPHADDD256", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHSUBD256", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTD256", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPWSSDS256", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPDPBUSDS256", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPSIGND256", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBD256", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPDPBUSD256", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPABSQ128", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPADDQ128", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPCMPEQQ128", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "MaskedMulLowInt32x16", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt32x16", argLength: 3, commutative: true},
{name: "MaskedOrInt32x16", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
{name: "MaskedPopCountInt32x16", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
{name: "MaskedSubInt32x16", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false},
{name: "MaskedXorInt32x16", argLength: 3, commutative: true},
{name: "MaxInt32x16", argLength: 2, commutative: true},
{name: "MinInt32x16", argLength: 2, commutative: true},
{name: "MulLowInt32x16", argLength: 2, commutative: true},
{name: "NotEqualInt32x16", argLength: 2, commutative: true},
{name: "OrInt32x16", argLength: 2, commutative: true},
+ {name: "PairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
{name: "PopCountInt32x16", argLength: 1, commutative: false},
+ {name: "SaturatedPairDotProdAccumulateInt32x16", argLength: 3, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
{name: "SubInt32x16", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
{name: "XorInt32x16", argLength: 2, commutative: true},
{name: "AbsoluteInt32x4", argLength: 1, commutative: false},
{name: "AddInt32x4", argLength: 2, commutative: true},
{name: "MaskedMulLowInt32x4", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt32x4", argLength: 3, commutative: true},
{name: "MaskedOrInt32x4", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
{name: "MaskedPopCountInt32x4", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
{name: "MaskedSubInt32x4", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false},
{name: "MaskedXorInt32x4", argLength: 3, commutative: true},
{name: "MaxInt32x4", argLength: 2, commutative: true},
{name: "MinInt32x4", argLength: 2, commutative: true},
{name: "MulLowInt32x4", argLength: 2, commutative: true},
{name: "NotEqualInt32x4", argLength: 2, commutative: true},
{name: "OrInt32x4", argLength: 2, commutative: true},
+ {name: "PairDotProdAccumulateInt32x4", argLength: 3, commutative: false},
{name: "PairwiseAddInt32x4", argLength: 2, commutative: false},
{name: "PairwiseSubInt32x4", argLength: 2, commutative: false},
{name: "PopCountInt32x4", argLength: 1, commutative: false},
+ {name: "SaturatedPairDotProdAccumulateInt32x4", argLength: 3, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
{name: "SignInt32x4", argLength: 2, commutative: false},
{name: "SubInt32x4", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
{name: "XorInt32x4", argLength: 2, commutative: true},
{name: "AbsoluteInt32x8", argLength: 1, commutative: false},
{name: "AddInt32x8", argLength: 2, commutative: true},
{name: "MaskedMulLowInt32x8", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt32x8", argLength: 3, commutative: true},
{name: "MaskedOrInt32x8", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
{name: "MaskedPopCountInt32x8", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
{name: "MaskedSubInt32x8", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false},
{name: "MaskedXorInt32x8", argLength: 3, commutative: true},
{name: "MaxInt32x8", argLength: 2, commutative: true},
{name: "MinInt32x8", argLength: 2, commutative: true},
{name: "MulLowInt32x8", argLength: 2, commutative: true},
{name: "NotEqualInt32x8", argLength: 2, commutative: true},
{name: "OrInt32x8", argLength: 2, commutative: true},
+ {name: "PairDotProdAccumulateInt32x8", argLength: 3, commutative: false},
{name: "PairwiseAddInt32x8", argLength: 2, commutative: false},
{name: "PairwiseSubInt32x8", argLength: 2, commutative: false},
{name: "PopCountInt32x8", argLength: 1, commutative: false},
+ {name: "SaturatedPairDotProdAccumulateInt32x8", argLength: 3, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
{name: "SignInt32x8", argLength: 2, commutative: false},
{name: "SubInt32x8", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
{name: "XorInt32x8", argLength: 2, commutative: true},
{name: "AbsoluteInt64x2", argLength: 1, commutative: false},
{name: "AddInt64x2", argLength: 2, commutative: true},
{name: "MaskedNotEqualUint32x16", argLength: 3, commutative: true},
{name: "MaskedOrUint32x16", argLength: 3, commutative: true},
{name: "MaskedPopCountUint32x16", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
{name: "MaskedSubUint32x16", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false},
{name: "MaskedXorUint32x16", argLength: 3, commutative: true},
{name: "MaxUint32x16", argLength: 2, commutative: true},
{name: "MinUint32x16", argLength: 2, commutative: true},
{name: "NotEqualUint32x16", argLength: 2, commutative: true},
{name: "OrUint32x16", argLength: 2, commutative: true},
{name: "PopCountUint32x16", argLength: 1, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
{name: "SubUint32x16", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
{name: "XorUint32x16", argLength: 2, commutative: true},
{name: "AddUint32x4", argLength: 2, commutative: true},
{name: "AndUint32x4", argLength: 2, commutative: true},
{name: "MaskedNotEqualUint32x4", argLength: 3, commutative: true},
{name: "MaskedOrUint32x4", argLength: 3, commutative: true},
{name: "MaskedPopCountUint32x4", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
{name: "MaskedSubUint32x4", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false},
{name: "MaskedXorUint32x4", argLength: 3, commutative: true},
{name: "MaxUint32x4", argLength: 2, commutative: true},
{name: "MinUint32x4", argLength: 2, commutative: true},
{name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
{name: "PopCountUint32x4", argLength: 1, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
{name: "SubUint32x4", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
{name: "XorUint32x4", argLength: 2, commutative: true},
{name: "AddUint32x8", argLength: 2, commutative: true},
{name: "AndUint32x8", argLength: 2, commutative: true},
{name: "MaskedNotEqualUint32x8", argLength: 3, commutative: true},
{name: "MaskedOrUint32x8", argLength: 3, commutative: true},
{name: "MaskedPopCountUint32x8", argLength: 2, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
{name: "MaskedSubUint32x8", argLength: 3, commutative: false},
+ {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false},
{name: "MaskedXorUint32x8", argLength: 3, commutative: true},
{name: "MaxUint32x8", argLength: 2, commutative: true},
{name: "MinUint32x8", argLength: 2, commutative: true},
{name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
{name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
{name: "PopCountUint32x8", argLength: 1, commutative: false},
+ {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
{name: "SubUint32x8", argLength: 2, commutative: false},
+ {name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
{name: "XorUint32x8", argLength: 2, commutative: true},
{name: "AddUint64x2", argLength: 2, commutative: true},
{name: "AndUint64x2", argLength: 2, commutative: true},
OpAMD64VPMINSDMasked512
OpAMD64VPMULLDMasked512
OpAMD64VPORDMasked512
+ OpAMD64VPDPWSSDMasked512
OpAMD64VPOPCNTDMasked512
+ OpAMD64VPDPWSSDSMasked512
+ OpAMD64VPDPBUSDSMasked512
OpAMD64VPSUBDMasked512
+ OpAMD64VPDPBUSDMasked512
OpAMD64VPXORDMasked512
OpAMD64VPMAXSD512
OpAMD64VPMINSD512
OpAMD64VPMULLD512
OpAMD64VPORD512
+ OpAMD64VPDPWSSD512
OpAMD64VPOPCNTD512
+ OpAMD64VPDPWSSDS512
+ OpAMD64VPDPBUSDS512
OpAMD64VPSUBD512
+ OpAMD64VPDPBUSD512
OpAMD64VPXORD512
OpAMD64VPABSD128
OpAMD64VPADDD128
OpAMD64VPMINSDMasked128
OpAMD64VPMULLDMasked128
OpAMD64VPORDMasked128
+ OpAMD64VPDPWSSDMasked128
OpAMD64VPOPCNTDMasked128
+ OpAMD64VPDPWSSDSMasked128
+ OpAMD64VPDPBUSDSMasked128
OpAMD64VPSUBDMasked128
+ OpAMD64VPDPBUSDMasked128
OpAMD64VPXORDMasked128
OpAMD64VPMAXSD128
OpAMD64VPMINSD128
OpAMD64VPMULDQ128
OpAMD64VPMULLD128
+ OpAMD64VPDPWSSD128
OpAMD64VPHADDD128
OpAMD64VPHSUBD128
OpAMD64VPOPCNTD128
+ OpAMD64VPDPWSSDS128
+ OpAMD64VPDPBUSDS128
OpAMD64VPSIGND128
OpAMD64VPSUBD128
+ OpAMD64VPDPBUSD128
OpAMD64VPABSD256
OpAMD64VPADDD256
OpAMD64VPCMPEQD256
OpAMD64VPMINSDMasked256
OpAMD64VPMULLDMasked256
OpAMD64VPORDMasked256
+ OpAMD64VPDPWSSDMasked256
OpAMD64VPOPCNTDMasked256
+ OpAMD64VPDPWSSDSMasked256
+ OpAMD64VPDPBUSDSMasked256
OpAMD64VPSUBDMasked256
+ OpAMD64VPDPBUSDMasked256
OpAMD64VPXORDMasked256
OpAMD64VPMAXSD256
OpAMD64VPMINSD256
OpAMD64VPMULDQ256
OpAMD64VPMULLD256
+ OpAMD64VPDPWSSD256
OpAMD64VPHADDD256
OpAMD64VPHSUBD256
OpAMD64VPOPCNTD256
+ OpAMD64VPDPWSSDS256
+ OpAMD64VPDPBUSDS256
OpAMD64VPSIGND256
OpAMD64VPSUBD256
+ OpAMD64VPDPBUSD256
OpAMD64VPABSQ128
OpAMD64VPADDQ128
OpAMD64VPCMPEQQ128
OpMaskedMulLowInt32x16
OpMaskedNotEqualInt32x16
OpMaskedOrInt32x16
+ OpMaskedPairDotProdAccumulateInt32x16
OpMaskedPopCountInt32x16
+ OpMaskedSaturatedPairDotProdAccumulateInt32x16
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
OpMaskedSubInt32x16
+ OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16
OpMaskedXorInt32x16
OpMaxInt32x16
OpMinInt32x16
OpMulLowInt32x16
OpNotEqualInt32x16
OpOrInt32x16
+ OpPairDotProdAccumulateInt32x16
OpPopCountInt32x16
+ OpSaturatedPairDotProdAccumulateInt32x16
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
OpSubInt32x16
+ OpUnsignedSignedQuadDotProdAccumulateInt32x16
OpXorInt32x16
OpAbsoluteInt32x4
OpAddInt32x4
OpMaskedMulLowInt32x4
OpMaskedNotEqualInt32x4
OpMaskedOrInt32x4
+ OpMaskedPairDotProdAccumulateInt32x4
OpMaskedPopCountInt32x4
+ OpMaskedSaturatedPairDotProdAccumulateInt32x4
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
OpMaskedSubInt32x4
+ OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4
OpMaskedXorInt32x4
OpMaxInt32x4
OpMinInt32x4
OpMulLowInt32x4
OpNotEqualInt32x4
OpOrInt32x4
+ OpPairDotProdAccumulateInt32x4
OpPairwiseAddInt32x4
OpPairwiseSubInt32x4
OpPopCountInt32x4
+ OpSaturatedPairDotProdAccumulateInt32x4
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
OpSignInt32x4
OpSubInt32x4
+ OpUnsignedSignedQuadDotProdAccumulateInt32x4
OpXorInt32x4
OpAbsoluteInt32x8
OpAddInt32x8
OpMaskedMulLowInt32x8
OpMaskedNotEqualInt32x8
OpMaskedOrInt32x8
+ OpMaskedPairDotProdAccumulateInt32x8
OpMaskedPopCountInt32x8
+ OpMaskedSaturatedPairDotProdAccumulateInt32x8
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
OpMaskedSubInt32x8
+ OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8
OpMaskedXorInt32x8
OpMaxInt32x8
OpMinInt32x8
OpMulLowInt32x8
OpNotEqualInt32x8
OpOrInt32x8
+ OpPairDotProdAccumulateInt32x8
OpPairwiseAddInt32x8
OpPairwiseSubInt32x8
OpPopCountInt32x8
+ OpSaturatedPairDotProdAccumulateInt32x8
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
OpSignInt32x8
OpSubInt32x8
+ OpUnsignedSignedQuadDotProdAccumulateInt32x8
OpXorInt32x8
OpAbsoluteInt64x2
OpAddInt64x2
OpMaskedNotEqualUint32x16
OpMaskedOrUint32x16
OpMaskedPopCountUint32x16
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
OpMaskedSubUint32x16
+ OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16
OpMaskedXorUint32x16
OpMaxUint32x16
OpMinUint32x16
OpNotEqualUint32x16
OpOrUint32x16
OpPopCountUint32x16
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
OpSubUint32x16
+ OpUnsignedSignedQuadDotProdAccumulateUint32x16
OpXorUint32x16
OpAddUint32x4
OpAndUint32x4
OpMaskedNotEqualUint32x4
OpMaskedOrUint32x4
OpMaskedPopCountUint32x4
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
OpMaskedSubUint32x4
+ OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4
OpMaskedXorUint32x4
OpMaxUint32x4
OpMinUint32x4
OpPairwiseAddUint32x4
OpPairwiseSubUint32x4
OpPopCountUint32x4
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
OpSubUint32x4
+ OpUnsignedSignedQuadDotProdAccumulateUint32x4
OpXorUint32x4
OpAddUint32x8
OpAndUint32x8
OpMaskedNotEqualUint32x8
OpMaskedOrUint32x8
OpMaskedPopCountUint32x8
+ OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
OpMaskedSubUint32x8
+ OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8
OpMaskedXorUint32x8
OpMaxUint32x8
OpMinUint32x8
OpPairwiseAddUint32x8
OpPairwiseSubUint32x8
OpPopCountUint32x8
+ OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
OpSubUint32x8
+ OpUnsignedSignedQuadDotProdAccumulateUint32x8
OpXorUint32x8
OpAddUint64x2
OpAndUint64x2
},
},
},
+ {
+ name: "VPDPWSSDMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTDMasked512",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDSMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDSMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSUBDMasked512",
argLen: 3,
},
},
},
+ {
+ name: "VPDPBUSDMasked512",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPXORDMasked512",
argLen: 3,
},
},
},
+ {
+ name: "VPDPWSSD512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTD512",
argLen: 1,
},
},
},
+ {
+ name: "VPDPWSSDS512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDS512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSUBD512",
argLen: 2,
},
},
},
+ {
+ name: "VPDPBUSD512",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPXORD512",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTDMasked128",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDSMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDSMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSUBDMasked128",
argLen: 3,
},
},
},
+ {
+ name: "VPDPBUSDMasked128",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPXORDMasked128",
argLen: 3,
},
},
},
+ {
+ name: "VPDPWSSD128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPHADDD128",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDS128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDS128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSIGND128",
argLen: 2,
},
},
},
+ {
+ name: "VPDPBUSD128",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPABSD256",
argLen: 1,
},
},
},
+ {
+ name: "VPDPWSSDMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTDMasked256",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDSMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDSMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSUBDMasked256",
argLen: 3,
},
},
},
+ {
+ name: "VPDPBUSDMasked256",
+ argLen: 4,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPXORDMasked256",
argLen: 3,
},
},
},
+ {
+ name: "VPDPWSSD256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPHADDD256",
argLen: 2,
},
},
},
+ {
+ name: "VPDPWSSDS256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPWSSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPDPBUSDS256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSDS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSIGND256",
argLen: 2,
},
},
},
+ {
+ name: "VPDPBUSD256",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPDPBUSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPABSQ128",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdAccumulateInt32x16",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedPopCountInt32x16",
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedPairDotProdAccumulateInt32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubInt32x16",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorInt32x16",
argLen: 3,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdAccumulateInt32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PopCountInt32x16",
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedPairDotProdAccumulateInt32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SubInt32x16",
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateInt32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorInt32x16",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdAccumulateInt32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedPopCountInt32x4",
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedPairDotProdAccumulateInt32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubInt32x4",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorInt32x4",
argLen: 3,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdAccumulateInt32x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PairwiseAddInt32x4",
argLen: 2,
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedPairDotProdAccumulateInt32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SignInt32x4",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateInt32x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorInt32x4",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdAccumulateInt32x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedPopCountInt32x8",
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedPairDotProdAccumulateInt32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubInt32x8",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorInt32x8",
argLen: 3,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdAccumulateInt32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "PairwiseAddInt32x8",
argLen: 2,
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedPairDotProdAccumulateInt32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SignInt32x8",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateInt32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorInt32x8",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubUint32x16",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorUint32x16",
argLen: 3,
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SubUint32x16",
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateUint32x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorUint32x16",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubUint32x4",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorUint32x4",
argLen: 3,
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SubUint32x4",
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateUint32x4",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorUint32x4",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedSubUint32x8",
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MaskedXorUint32x8",
argLen: 3,
argLen: 1,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "SubUint32x8",
argLen: 2,
generic: true,
},
+ {
+ name: "UnsignedSignedQuadDotProdAccumulateUint32x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "XorUint32x8",
argLen: 2,
return rewriteValueAMD64_OpMaskedOrUint64x4(v)
case OpMaskedOrUint64x8:
return rewriteValueAMD64_OpMaskedOrUint64x8(v)
+ case OpMaskedPairDotProdAccumulateInt32x16:
+ return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v)
+ case OpMaskedPairDotProdAccumulateInt32x4:
+ return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v)
+ case OpMaskedPairDotProdAccumulateInt32x8:
+ return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v)
case OpMaskedPairDotProdInt16x16:
return rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v)
case OpMaskedPairDotProdInt16x32:
return rewriteValueAMD64_OpMaskedSaturatedAddUint8x32(v)
case OpMaskedSaturatedAddUint8x64:
return rewriteValueAMD64_OpMaskedSaturatedAddUint8x64(v)
+ case OpMaskedSaturatedPairDotProdAccumulateInt32x16:
+ return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v)
+ case OpMaskedSaturatedPairDotProdAccumulateInt32x4:
+ return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v)
+ case OpMaskedSaturatedPairDotProdAccumulateInt32x8:
+ return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v)
case OpMaskedSaturatedSubInt16x16:
return rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v)
case OpMaskedSaturatedSubInt16x32:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v)
case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8:
return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v)
+ case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v)
case OpMaskedSqrtFloat32x16:
return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v)
case OpMaskedSqrtFloat32x4:
return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v)
case OpMaskedTruncWithPrecisionFloat64x8:
return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v)
+ case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8:
+ return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v)
case OpMaskedXorFloat32x16:
return rewriteValueAMD64_OpMaskedXorFloat32x16(v)
case OpMaskedXorFloat32x4:
case OpOrUint8x32:
v.Op = OpAMD64VPOR256
return true
+ case OpPairDotProdAccumulateInt32x16:
+ v.Op = OpAMD64VPDPWSSD512
+ return true
+ case OpPairDotProdAccumulateInt32x4:
+ v.Op = OpAMD64VPDPWSSD128
+ return true
+ case OpPairDotProdAccumulateInt32x8:
+ v.Op = OpAMD64VPDPWSSD256
+ return true
case OpPairDotProdInt16x16:
v.Op = OpAMD64VPMADDWD256
return true
case OpSaturatedAddUint8x64:
v.Op = OpAMD64VPADDSB512
return true
+ case OpSaturatedPairDotProdAccumulateInt32x16:
+ v.Op = OpAMD64VPDPWSSDS512
+ return true
+ case OpSaturatedPairDotProdAccumulateInt32x4:
+ v.Op = OpAMD64VPDPWSSDS128
+ return true
+ case OpSaturatedPairDotProdAccumulateInt32x8:
+ v.Op = OpAMD64VPDPWSSDS256
+ return true
case OpSaturatedPairwiseAddInt16x16:
v.Op = OpAMD64VPHADDSW256
return true
case OpSaturatedUnsignedSignedPairDotProdUint8x32:
v.Op = OpAMD64VPMADDUBSW256
return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
+ v.Op = OpAMD64VPDPBUSDS512
+ return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
+ v.Op = OpAMD64VPDPBUSDS128
+ return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8:
+ v.Op = OpAMD64VPDPBUSDS256
+ return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16:
+ v.Op = OpAMD64VPDPBUSDS512
+ return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4:
+ v.Op = OpAMD64VPDPBUSDS128
+ return true
+ case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8:
+ v.Op = OpAMD64VPDPBUSDS256
+ return true
case OpSelect0:
return rewriteValueAMD64_OpSelect0(v)
case OpSelect1:
return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v)
case OpTruncWithPrecisionFloat64x8:
return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v)
+ case OpUnsignedSignedQuadDotProdAccumulateInt32x16:
+ v.Op = OpAMD64VPDPBUSD512
+ return true
+ case OpUnsignedSignedQuadDotProdAccumulateInt32x4:
+ v.Op = OpAMD64VPDPBUSD128
+ return true
+ case OpUnsignedSignedQuadDotProdAccumulateInt32x8:
+ v.Op = OpAMD64VPDPBUSD256
+ return true
+ case OpUnsignedSignedQuadDotProdAccumulateUint32x16:
+ v.Op = OpAMD64VPDPBUSD512
+ return true
+ case OpUnsignedSignedQuadDotProdAccumulateUint32x4:
+ v.Op = OpAMD64VPDPBUSD128
+ return true
+ case OpUnsignedSignedQuadDotProdAccumulateUint32x8:
+ v.Op = OpAMD64VPDPBUSD256
+ return true
case OpWB:
v.Op = OpAMD64LoweredWB
return true
return true
}
}
+func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdAccumulateInt32x16 x y z mask)
+ // result: (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdAccumulateInt32x4 x y z mask)
+ // result: (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdAccumulateInt32x8 x y z mask)
+ // result: (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
+func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask)
+ // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask)
+ // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask)
+ // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask)
+ // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask)
+ // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask)
+ // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask)
+ // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask)
+ // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask)
+ // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask)
+ // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask)
+ // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask)
+ // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask)
+ // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask)
+ // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask)
+ // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedXorFloat32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
addF(simdPackage, "Int32x16.MaskedOr", opLen3(ssa.OpMaskedOrInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x16.MaskedSub", opLen3(ssa.OpMaskedSubInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x16.MaskedXor", opLen3(ssa.OpMaskedXorInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedOr", opLen3(ssa.OpMaskedOrInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedSub", opLen3(ssa.OpMaskedSubInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.MaskedXor", opLen3(ssa.OpMaskedXorInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedOr", opLen3(ssa.OpMaskedOrInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedSub", opLen3(ssa.OpMaskedSubInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x8.MaskedXor", opLen3(ssa.OpMaskedXorInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedOr", opLen3(ssa.OpMaskedOrUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedSub", opLen3(ssa.OpMaskedSubUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedXor", opLen3(ssa.OpMaskedXorUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedOr", opLen3(ssa.OpMaskedOrUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedSub", opLen3(ssa.OpMaskedSubUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.MaskedXor", opLen3(ssa.OpMaskedXorUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedOr", opLen3(ssa.OpMaskedOrUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedSub", opLen3(ssa.OpMaskedSubUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x8.MaskedXor", opLen3(ssa.OpMaskedXorUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
func (x Float64x2) Div(y Float64x2) Float64x2
// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// Const Immediate = 127.
//
// Asm: VDPPD, CPU Feature: AVX
func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512EVEX
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512EVEX
// Asm: VPSUBB, CPU Feature: AVX512EVEX
func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+
// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
// Const Immediate = 10.
//