This CL is generated by CL 678515.
Change-Id: Iac7c424bbbffc2514dff3495d6c408fa9c998c2f
Reviewed-on: https://go-review.googlesource.com/c/go/+/681296
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
ssa.OpAMD64VORPD512,
ssa.OpAMD64VPORD512,
ssa.OpAMD64VPORQ512,
+ ssa.OpAMD64VPMADDWD256,
+ ssa.OpAMD64VPMADDWD128,
+ ssa.OpAMD64VPMADDWD512,
ssa.OpAMD64VHADDPS128,
ssa.OpAMD64VHADDPS256,
ssa.OpAMD64VHADDPD128,
ssa.OpAMD64VPSUBSB256,
ssa.OpAMD64VPSUBSW512,
ssa.OpAMD64VPSUBSB512,
+ ssa.OpAMD64VPMADDUBSW128,
+ ssa.OpAMD64VPMADDUBSW256,
+ ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VPSIGNW256,
ssa.OpAMD64VPSIGNW128,
ssa.OpAMD64VPSIGND128,
ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512,
+ ssa.OpAMD64VPMADDWDMasked256,
+ ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPADDSWMasked128,
ssa.OpAMD64VPSUBSBMasked128,
ssa.OpAMD64VPSUBSBMasked256,
ssa.OpAMD64VPSUBSBMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked256,
+ ssa.OpAMD64VPMADDUBSWMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPSUBWMasked256,
ssa.OpAMD64VPSUBWMasked512,
ssa.OpAMD64VPSUBWMasked128,
ssa.OpAMD64VREDUCEPDMasked512:
p = simdFp1k1fp1Imm8(s, v)
- case ssa.OpAMD64VCMPPS128,
+ case ssa.OpAMD64VDPPD128,
+ ssa.OpAMD64VCMPPS128,
ssa.OpAMD64VCMPPS256,
ssa.OpAMD64VCMPPD128,
ssa.OpAMD64VCMPPD256:
ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512,
+ ssa.OpAMD64VPMADDWDMasked256,
+ ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPMADDWDMasked128,
ssa.OpAMD64VPOPCNTWMasked256,
ssa.OpAMD64VPOPCNTWMasked512,
ssa.OpAMD64VPOPCNTWMasked128,
ssa.OpAMD64VPSUBSBMasked128,
ssa.OpAMD64VPSUBSBMasked256,
ssa.OpAMD64VPSUBSBMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked256,
+ ssa.OpAMD64VPMADDUBSWMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VSQRTPSMasked512,
ssa.OpAMD64VSQRTPSMasked128,
ssa.OpAMD64VSQRTPSMasked256,
(DivFloat64x2 ...) => (VDIVPD128 ...)
(DivFloat64x4 ...) => (VDIVPD256 ...)
(DivFloat64x8 ...) => (VDIVPD512 ...)
+(DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
(MaskedOrUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MaskedOrUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MaskedOrUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(OrUint64x8 ...) => (VPORQ512 ...)
(OrUint8x16 ...) => (VPOR128 ...)
(OrUint8x32 ...) => (VPOR256 ...)
+(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
+(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
+(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
+(SaturatedUnsignedSignedPairDotProdUint16x16 ...) => (VPMADDUBSW256 ...)
+(SaturatedUnsignedSignedPairDotProdUint16x32 ...) => (VPMADDUBSW512 ...)
+(SaturatedUnsignedSignedPairDotProdUint16x8 ...) => (VPMADDUBSW128 ...)
+(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
+(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
(SignInt16x16 ...) => (VPSIGNW256 ...)
(SignInt16x8 ...) => (VPSIGNW128 ...)
(SignInt32x4 ...) => (VPSIGND128 ...)
{name: "VPMINSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPMADDWDMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTWMasked256", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPADDSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPSUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMULHW256", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLW256", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPMADDWD256", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHADDW256", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHSUBW256", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTW256", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMINSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPMADDWDMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTWMasked512", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPADDSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPSUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHW512", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLW512", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPMADDWD512", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMINSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPMADDWDMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTWMasked128", argLength: 2, reg: fp1k1fp1, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPADDSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPSUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMULHW128", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLW128", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPMADDWD128", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHADDW128", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHSUBW128", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTW128", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPAVGWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUBMasked128", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPAVGB256", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPAVGBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUBMasked256", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPAVGB512", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPAVGBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VDPPD128", argLength: 2, reg: fp21, asm: "VDPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},
{name: "DivFloat64x2", argLength: 2, commutative: false},
+ {name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true},
{name: "EqualFloat64x2", argLength: 2, commutative: true},
{name: "FloorFloat64x2", argLength: 1, commutative: false},
{name: "GreaterFloat64x2", argLength: 2, commutative: false},
{name: "MaskedMulHighInt16x16", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x16", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x16", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdInt16x16", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false},
{name: "MulLowInt16x16", argLength: 2, commutative: true},
{name: "NotEqualInt16x16", argLength: 2, commutative: true},
{name: "OrInt16x16", argLength: 2, commutative: true},
+ {name: "PairDotProdInt16x16", argLength: 2, commutative: false},
{name: "PairwiseAddInt16x16", argLength: 2, commutative: false},
{name: "PairwiseSubInt16x16", argLength: 2, commutative: false},
{name: "PopCountInt16x16", argLength: 1, commutative: false},
{name: "MaskedMulHighInt16x32", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x32", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x32", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdInt16x32", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false},
{name: "MulHighInt16x32", argLength: 2, commutative: true},
{name: "MulLowInt16x32", argLength: 2, commutative: true},
{name: "NotEqualInt16x32", argLength: 2, commutative: true},
+ {name: "PairDotProdInt16x32", argLength: 2, commutative: false},
{name: "PopCountInt16x32", argLength: 1, commutative: false},
{name: "SaturatedAddInt16x32", argLength: 2, commutative: true},
{name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
{name: "MaskedMulHighInt16x8", argLength: 3, commutative: true},
{name: "MaskedMulLowInt16x8", argLength: 3, commutative: true},
{name: "MaskedNotEqualInt16x8", argLength: 3, commutative: true},
+ {name: "MaskedPairDotProdInt16x8", argLength: 3, commutative: false},
{name: "MaskedPopCountInt16x8", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false},
{name: "MulLowInt16x8", argLength: 2, commutative: true},
{name: "NotEqualInt16x8", argLength: 2, commutative: true},
{name: "OrInt16x8", argLength: 2, commutative: true},
+ {name: "PairDotProdInt16x8", argLength: 2, commutative: false},
{name: "PairwiseAddInt16x8", argLength: 2, commutative: false},
{name: "PairwiseSubInt16x8", argLength: 2, commutative: false},
{name: "PopCountInt16x8", argLength: 1, commutative: false},
{name: "MaskedPopCountUint16x16", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16", argLength: 3, commutative: false},
{name: "MaskedSubUint16x16", argLength: 3, commutative: false},
{name: "MaxUint16x16", argLength: 2, commutative: true},
{name: "MinUint16x16", argLength: 2, commutative: true},
{name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
+ {name: "SaturatedUnsignedSignedPairDotProdUint16x16", argLength: 2, commutative: false},
{name: "SubUint16x16", argLength: 2, commutative: false},
{name: "XorUint16x16", argLength: 2, commutative: true},
{name: "AddUint16x32", argLength: 2, commutative: true},
{name: "MaskedPopCountUint16x32", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32", argLength: 3, commutative: false},
{name: "MaskedSubUint16x32", argLength: 3, commutative: false},
{name: "MaxUint16x32", argLength: 2, commutative: true},
{name: "MinUint16x32", argLength: 2, commutative: true},
{name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
+ {name: "SaturatedUnsignedSignedPairDotProdUint16x32", argLength: 2, commutative: false},
{name: "SubUint16x32", argLength: 2, commutative: false},
{name: "AddUint16x8", argLength: 2, commutative: true},
{name: "AndUint16x8", argLength: 2, commutative: true},
{name: "MaskedPopCountUint16x8", argLength: 2, commutative: false},
{name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true},
{name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false},
+ {name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8", argLength: 3, commutative: false},
{name: "MaskedSubUint16x8", argLength: 3, commutative: false},
{name: "MaxUint16x8", argLength: 2, commutative: true},
{name: "MinUint16x8", argLength: 2, commutative: true},
{name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
+ {name: "SaturatedUnsignedSignedPairDotProdUint16x8", argLength: 2, commutative: false},
{name: "SubUint16x8", argLength: 2, commutative: false},
{name: "XorUint16x8", argLength: 2, commutative: true},
{name: "AddUint32x16", argLength: 2, commutative: true},
{name: "PopCountUint8x16", argLength: 1, commutative: false},
{name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
{name: "SaturatedSubUint8x16", argLength: 2, commutative: false},
+ {name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLength: 2, commutative: false},
{name: "SubUint8x16", argLength: 2, commutative: false},
{name: "XorUint8x16", argLength: 2, commutative: true},
{name: "AddUint8x32", argLength: 2, commutative: true},
{name: "PopCountUint8x32", argLength: 1, commutative: false},
{name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
{name: "SaturatedSubUint8x32", argLength: 2, commutative: false},
+ {name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLength: 2, commutative: false},
{name: "SubUint8x32", argLength: 2, commutative: false},
{name: "XorUint8x32", argLength: 2, commutative: true},
{name: "AddUint8x64", argLength: 2, commutative: true},
OpAMD64VPMINSWMasked256
OpAMD64VPMULHWMasked256
OpAMD64VPMULLWMasked256
+ OpAMD64VPMADDWDMasked256
OpAMD64VPOPCNTWMasked256
OpAMD64VPADDSWMasked256
OpAMD64VPSUBSWMasked256
OpAMD64VPMULHW256
OpAMD64VPMULLW256
OpAMD64VPOR256
+ OpAMD64VPMADDWD256
OpAMD64VPHADDW256
OpAMD64VPHSUBW256
OpAMD64VPOPCNTW256
OpAMD64VPMINSWMasked512
OpAMD64VPMULHWMasked512
OpAMD64VPMULLWMasked512
+ OpAMD64VPMADDWDMasked512
OpAMD64VPOPCNTWMasked512
OpAMD64VPADDSWMasked512
OpAMD64VPSUBSWMasked512
OpAMD64VPMINSW512
OpAMD64VPMULHW512
OpAMD64VPMULLW512
+ OpAMD64VPMADDWD512
OpAMD64VPOPCNTW512
OpAMD64VPADDSW512
OpAMD64VPSUBSW512
OpAMD64VPMINSWMasked128
OpAMD64VPMULHWMasked128
OpAMD64VPMULLWMasked128
+ OpAMD64VPMADDWDMasked128
OpAMD64VPOPCNTWMasked128
OpAMD64VPADDSWMasked128
OpAMD64VPSUBSWMasked128
OpAMD64VPMULHW128
OpAMD64VPMULLW128
OpAMD64VPOR128
+ OpAMD64VPMADDWD128
OpAMD64VPHADDW128
OpAMD64VPHSUBW128
OpAMD64VPOPCNTW128
OpAMD64VPMAXUWMasked256
OpAMD64VPMINUWMasked256
OpAMD64VPMULHUWMasked256
+ OpAMD64VPMADDUBSWMasked256
OpAMD64VPMAXUW256
OpAMD64VPMINUW256
OpAMD64VPMULHUW256
OpAMD64VPMAXUWMasked512
OpAMD64VPMINUWMasked512
OpAMD64VPMULHUWMasked512
+ OpAMD64VPMADDUBSWMasked512
OpAMD64VPMAXUW512
OpAMD64VPMINUW512
OpAMD64VPMULHUW512
+ OpAMD64VPMADDUBSW512
OpAMD64VPAVGW128
OpAMD64VPAVGWMasked128
OpAMD64VPMAXUWMasked128
OpAMD64VPMINUWMasked128
OpAMD64VPMULHUWMasked128
+ OpAMD64VPMADDUBSWMasked128
OpAMD64VPMAXUW128
OpAMD64VPMINUW128
OpAMD64VPMULHUW128
OpAMD64VPMINUBMasked128
OpAMD64VPMAXUB128
OpAMD64VPMINUB128
+ OpAMD64VPMADDUBSW128
OpAMD64VPAVGB256
OpAMD64VPAVGBMasked256
OpAMD64VPMAXUBMasked256
OpAMD64VPMINUBMasked256
OpAMD64VPMAXUB256
OpAMD64VPMINUB256
+ OpAMD64VPMADDUBSW256
OpAMD64VPAVGB512
OpAMD64VPAVGBMasked512
OpAMD64VPMAXUBMasked512
OpAMD64VROUNDPD128
OpAMD64VRNDSCALEPD128
OpAMD64VREDUCEPD128
+ OpAMD64VDPPD128
OpAMD64VCMPPD128
OpAMD64VRNDSCALEPDMasked128
OpAMD64VREDUCEPDMasked128
OpApproximateReciprocalOfSqrtFloat64x2
OpCeilFloat64x2
OpDivFloat64x2
+ OpDotProdBroadcastFloat64x2
OpEqualFloat64x2
OpFloorFloat64x2
OpGreaterFloat64x2
OpMaskedMulHighInt16x16
OpMaskedMulLowInt16x16
OpMaskedNotEqualInt16x16
+ OpMaskedPairDotProdInt16x16
OpMaskedPopCountInt16x16
OpMaskedSaturatedAddInt16x16
OpMaskedSaturatedSubInt16x16
OpMulLowInt16x16
OpNotEqualInt16x16
OpOrInt16x16
+ OpPairDotProdInt16x16
OpPairwiseAddInt16x16
OpPairwiseSubInt16x16
OpPopCountInt16x16
OpMaskedMulHighInt16x32
OpMaskedMulLowInt16x32
OpMaskedNotEqualInt16x32
+ OpMaskedPairDotProdInt16x32
OpMaskedPopCountInt16x32
OpMaskedSaturatedAddInt16x32
OpMaskedSaturatedSubInt16x32
OpMulHighInt16x32
OpMulLowInt16x32
OpNotEqualInt16x32
+ OpPairDotProdInt16x32
OpPopCountInt16x32
OpSaturatedAddInt16x32
OpSaturatedSubInt16x32
OpMaskedMulHighInt16x8
OpMaskedMulLowInt16x8
OpMaskedNotEqualInt16x8
+ OpMaskedPairDotProdInt16x8
OpMaskedPopCountInt16x8
OpMaskedSaturatedAddInt16x8
OpMaskedSaturatedSubInt16x8
OpMulLowInt16x8
OpNotEqualInt16x8
OpOrInt16x8
+ OpPairDotProdInt16x8
OpPairwiseAddInt16x8
OpPairwiseSubInt16x8
OpPopCountInt16x8
OpMaskedPopCountUint16x16
OpMaskedSaturatedAddUint16x16
OpMaskedSaturatedSubUint16x16
+ OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16
OpMaskedSubUint16x16
OpMaxUint16x16
OpMinUint16x16
OpPopCountUint16x16
OpSaturatedAddUint16x16
OpSaturatedSubUint16x16
+ OpSaturatedUnsignedSignedPairDotProdUint16x16
OpSubUint16x16
OpXorUint16x16
OpAddUint16x32
OpMaskedPopCountUint16x32
OpMaskedSaturatedAddUint16x32
OpMaskedSaturatedSubUint16x32
+ OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32
OpMaskedSubUint16x32
OpMaxUint16x32
OpMinUint16x32
OpPopCountUint16x32
OpSaturatedAddUint16x32
OpSaturatedSubUint16x32
+ OpSaturatedUnsignedSignedPairDotProdUint16x32
OpSubUint16x32
OpAddUint16x8
OpAndUint16x8
OpMaskedPopCountUint16x8
OpMaskedSaturatedAddUint16x8
OpMaskedSaturatedSubUint16x8
+ OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8
OpMaskedSubUint16x8
OpMaxUint16x8
OpMinUint16x8
OpPopCountUint16x8
OpSaturatedAddUint16x8
OpSaturatedSubUint16x8
+ OpSaturatedUnsignedSignedPairDotProdUint16x8
OpSubUint16x8
OpXorUint16x8
OpAddUint32x16
OpPopCountUint8x16
OpSaturatedAddUint8x16
OpSaturatedSubUint8x16
+ OpSaturatedUnsignedSignedPairDotProdUint8x16
OpSubUint8x16
OpXorUint8x16
OpAddUint8x32
OpPopCountUint8x32
OpSaturatedAddUint8x32
OpSaturatedSubUint8x32
+ OpSaturatedUnsignedSignedPairDotProdUint8x32
OpSubUint8x32
OpXorUint8x32
OpAddUint8x64
},
},
},
+ {
+ name: "VPMADDWDMasked256",
+ argLen: 3,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTWMasked256",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDWD256",
+ argLen: 2,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPHADDW256",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDWDMasked512",
+ argLen: 3,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTWMasked512",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDWD512",
+ argLen: 2,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTW512",
argLen: 1,
},
},
},
+ {
+ name: "VPMADDWDMasked128",
+ argLen: 3,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPOPCNTWMasked128",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDWD128",
+ argLen: 2,
+ asm: x86.AVPMADDWD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPHADDW128",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSWMasked256",
+ argLen: 3,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMAXUW256",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSWMasked512",
+ argLen: 3,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMAXUW512",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSW512",
+ argLen: 2,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPAVGW128",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSWMasked128",
+ argLen: 3,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPMAXUW128",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSW128",
+ argLen: 2,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPAVGB256",
argLen: 2,
},
},
},
+ {
+ name: "VPMADDUBSW256",
+ argLen: 2,
+ asm: x86.AVPMADDUBSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPAVGB512",
argLen: 2,
},
},
},
+ {
+ name: "VDPPD128",
+ auxType: auxInt8,
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVDPPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VCMPPD128",
auxType: auxInt8,
argLen: 2,
generic: true,
},
+ {
+ name: "DotProdBroadcastFloat64x2",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
{
name: "EqualFloat64x2",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdInt16x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedPopCountInt16x16",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdInt16x16",
+ argLen: 2,
+ generic: true,
+ },
{
name: "PairwiseAddInt16x16",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdInt16x32",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedPopCountInt16x32",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdInt16x32",
+ argLen: 2,
+ generic: true,
+ },
{
name: "PopCountInt16x32",
argLen: 1,
commutative: true,
generic: true,
},
+ {
+ name: "MaskedPairDotProdInt16x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedPopCountInt16x8",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "PairDotProdInt16x8",
+ argLen: 2,
+ generic: true,
+ },
{
name: "PairwiseAddInt16x8",
argLen: 2,
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x16",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedSubUint16x16",
argLen: 3,
argLen: 2,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedPairDotProdUint16x16",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint16x16",
argLen: 2,
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x32",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedSubUint16x32",
argLen: 3,
argLen: 2,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedPairDotProdUint16x32",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint16x32",
argLen: 2,
argLen: 3,
generic: true,
},
+ {
+ name: "MaskedSaturatedUnsignedSignedPairDotProdUint16x8",
+ argLen: 3,
+ generic: true,
+ },
{
name: "MaskedSubUint16x8",
argLen: 3,
argLen: 2,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedPairDotProdUint16x8",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint16x8",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedPairDotProdUint8x16",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint8x16",
argLen: 2,
argLen: 2,
generic: true,
},
+ {
+ name: "SaturatedUnsignedSignedPairDotProdUint8x32",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint8x32",
argLen: 2,
case OpDivFloat64x8:
v.Op = OpAMD64VDIVPD512
return true
+ case OpDotProdBroadcastFloat64x2:
+ return rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v)
case OpEq16:
return rewriteValueAMD64_OpEq16(v)
case OpEq32:
return rewriteValueAMD64_OpMaskedOrUint64x4(v)
case OpMaskedOrUint64x8:
return rewriteValueAMD64_OpMaskedOrUint64x8(v)
+ case OpMaskedPairDotProdInt16x16:
+ return rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v)
+ case OpMaskedPairDotProdInt16x32:
+ return rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v)
+ case OpMaskedPairDotProdInt16x8:
+ return rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v)
case OpMaskedPopCountInt16x16:
return rewriteValueAMD64_OpMaskedPopCountInt16x16(v)
case OpMaskedPopCountInt16x32:
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v)
case OpMaskedSaturatedSubUint8x64:
return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v)
+ case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v)
+ case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v)
+ case OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8:
+ return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v)
case OpMaskedSqrtFloat32x16:
return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v)
case OpMaskedSqrtFloat32x4:
case OpOrUint8x32:
v.Op = OpAMD64VPOR256
return true
+ case OpPairDotProdInt16x16:
+ v.Op = OpAMD64VPMADDWD256
+ return true
+ case OpPairDotProdInt16x32:
+ v.Op = OpAMD64VPMADDWD512
+ return true
+ case OpPairDotProdInt16x8:
+ v.Op = OpAMD64VPMADDWD128
+ return true
case OpPairwiseAddFloat32x4:
v.Op = OpAMD64VHADDPS128
return true
case OpSaturatedSubUint8x64:
v.Op = OpAMD64VPSUBSB512
return true
+ case OpSaturatedUnsignedSignedPairDotProdUint16x16:
+ v.Op = OpAMD64VPMADDUBSW256
+ return true
+ case OpSaturatedUnsignedSignedPairDotProdUint16x32:
+ v.Op = OpAMD64VPMADDUBSW512
+ return true
+ case OpSaturatedUnsignedSignedPairDotProdUint16x8:
+ v.Op = OpAMD64VPMADDUBSW128
+ return true
+ case OpSaturatedUnsignedSignedPairDotProdUint8x16:
+ v.Op = OpAMD64VPMADDUBSW128
+ return true
+ case OpSaturatedUnsignedSignedPairDotProdUint8x32:
+ v.Op = OpAMD64VPMADDUBSW256
+ return true
case OpSelect0:
return rewriteValueAMD64_OpSelect0(v)
case OpSelect1:
return true
}
}
+func rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (DotProdBroadcastFloat64x2 x y)
+ // result: (VDPPD128 [127] x y)
+ for {
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VDPPD128)
+ v.AuxInt = int8ToAuxInt(127)
+ v.AddArg2(x, y)
+ return true
+ }
+}
func rewriteValueAMD64_OpEq16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
+func rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdInt16x16 x y mask)
+ // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdInt16x32 x y mask)
+ // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedPairDotProdInt16x8 x y mask)
+ // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedPopCountInt16x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x16 x y mask)
+ // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x32 x y mask)
+ // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MaskedSaturatedUnsignedSignedPairDotProdUint16x8 x y mask)
+ // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.Sub", opLen2(ssa.OpSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.Sub", opLen2(ssa.OpSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Add", opLen2(ssa.OpAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.Sub", opLen2(ssa.OpSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Add", opLen2(ssa.OpAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Sub", opLen2(ssa.OpSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.Add", opLen2(ssa.OpAddUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Sub", opLen2(ssa.OpSubUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Add", opLen2(ssa.OpAddUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.Sub", opLen2(ssa.OpSubUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Add", opLen2(ssa.OpAddUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x32.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64)
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x2) Div(y Float64x2) Float64x2
+// Multiply all the elements and add them together; the result is a broadcast of the dot product
+//
+// Asm: VDPPD, CPU Feature: AVX
+func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
+
// Predicate immediate is 0 if it has;
//
// Asm: VCMPPD, CPU Feature: AVX
// Asm: VPOR, CPU Feature: AVX2
func (x Int16x16) Or(y Int16x16) Int16x16
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX2
+func (x Int16x16) PairDotProd(y Int16x16) Int32x8
+
// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target
//
// Asm: VPHADDW, CPU Feature: AVX2
// Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x32) NotEqual(y Int16x32) Mask16x32
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x32) PairDotProd(y Int16x32) Int32x16
+
// Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
// Asm: VPOR, CPU Feature: AVX
func (x Int16x8) Or(y Int16x8) Int16x8
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) PairDotProd(y Int16x8) Int32x4
+
// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target
//
// Asm: VPHADDW, CPU Feature: AVX
// Asm: VPSUBSW, CPU Feature: AVX2
func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) SaturatedUnsignedSignedPairDotProd(y Int16x16) Int16x16
+
// Asm: VPSUBW, CPU Feature: AVX2
func (x Uint16x16) Sub(y Uint16x16) Uint16x16
// Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) SaturatedUnsignedSignedPairDotProd(y Int16x32) Int16x32
+
// Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x32) Sub(y Uint16x32) Uint16x32
// Asm: VPSUBSW, CPU Feature: AVX
func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) SaturatedUnsignedSignedPairDotProd(y Int16x8) Int16x8
+
// Asm: VPSUBW, CPU Feature: AVX
func (x Uint16x8) Sub(y Uint16x8) Uint16x8
// Asm: VPSUBSB, CPU Feature: AVX
func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX
+func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8
+
// Asm: VPSUBB, CPU Feature: AVX
func (x Uint8x16) Sub(y Uint8x16) Uint8x16
// Asm: VPSUBSB, CPU Feature: AVX2
func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX2
+func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
+
// Asm: VPSUBB, CPU Feature: AVX2
func (x Uint8x32) Sub(y Uint8x32) Uint8x32
// Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8
+
// Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16
// Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16
+
// Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32
// Asm: VPCMPW, CPU Feature: AVX512EVEX
func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8
+// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDWD, CPU Feature: AVX512EVEX
+func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4
+
// Asm: VPADDSW, CPU Feature: AVX512EVEX
func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8
// Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x16, z Mask16x16) Int16x16
+
// Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16
// Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x32, z Mask16x32) Int16x32
+
// Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32
// Asm: VPSUBSW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8
+// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX
+func (x Uint16x8) MaskedSaturatedUnsignedSignedPairDotProd(y Int16x8, z Mask16x8) Int16x8
+
// Asm: VPSUBW, CPU Feature: AVX512EVEX
func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8