ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
+ ssa.OpAMD64VBROADCASTSS128,
+ ssa.OpAMD64VPBROADCASTQ128,
+ ssa.OpAMD64VPBROADCASTB128,
+ ssa.OpAMD64VPBROADCASTW128,
+ ssa.OpAMD64VPBROADCASTD128,
+ ssa.OpAMD64VBROADCASTSS256,
+ ssa.OpAMD64VBROADCASTSD256,
+ ssa.OpAMD64VPBROADCASTB256,
+ ssa.OpAMD64VPBROADCASTW256,
+ ssa.OpAMD64VPBROADCASTD256,
+ ssa.OpAMD64VPBROADCASTQ256,
+ ssa.OpAMD64VBROADCASTSS512,
+ ssa.OpAMD64VBROADCASTSD512,
+ ssa.OpAMD64VPBROADCASTB512,
+ ssa.OpAMD64VPBROADCASTW512,
+ ssa.OpAMD64VPBROADCASTD512,
+ ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VCVTTPS2DQ128,
ssa.OpAMD64VCVTTPS2DQ256,
ssa.OpAMD64VCVTTPS2DQ512,
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked128,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked256,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VPBROADCASTDMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VBROADCASTSSMasked512,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTDMasked512,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
ssa.OpAMD64VPSRLQMasked512:
p = simdVfpkv(s, v)
- case ssa.OpAMD64VPINSRB128,
- ssa.OpAMD64VPINSRW128,
- ssa.OpAMD64VPINSRD128,
- ssa.OpAMD64VPINSRQ128:
+ case ssa.OpAMD64VPINSRD128,
+ ssa.OpAMD64VPINSRQ128,
+ ssa.OpAMD64VPINSRB128,
+ ssa.OpAMD64VPINSRW128:
p = simdVgpvImm8(s, v)
case ssa.OpAMD64VPEXTRB128,
ssa.OpAMD64VPAVGWMasked128,
ssa.OpAMD64VPAVGWMasked256,
ssa.OpAMD64VPAVGWMasked512,
+ ssa.OpAMD64VBROADCASTSSMasked128,
+ ssa.OpAMD64VPBROADCASTQMasked128,
+ ssa.OpAMD64VPBROADCASTBMasked128,
+ ssa.OpAMD64VPBROADCASTWMasked128,
+ ssa.OpAMD64VPBROADCASTDMasked128,
+ ssa.OpAMD64VBROADCASTSSMasked256,
+ ssa.OpAMD64VBROADCASTSDMasked256,
+ ssa.OpAMD64VPBROADCASTBMasked256,
+ ssa.OpAMD64VPBROADCASTWMasked256,
+ ssa.OpAMD64VPBROADCASTDMasked256,
+ ssa.OpAMD64VPBROADCASTQMasked256,
+ ssa.OpAMD64VBROADCASTSSMasked512,
+ ssa.OpAMD64VBROADCASTSDMasked512,
+ ssa.OpAMD64VPBROADCASTBMasked512,
+ ssa.OpAMD64VPBROADCASTWMasked512,
+ ssa.OpAMD64VPBROADCASTDMasked512,
+ ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VRNDSCALEPSMasked128,
ssa.OpAMD64VRNDSCALEPSMasked256,
ssa.OpAMD64VRNDSCALEPSMasked512,
(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
+(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
+(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
+(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
+(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
+(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
+(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
+(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(SetElemFloat32x4 ...) => (VPINSRD128 ...)
+(SetElemFloat64x2 ...) => (VPINSRQ128 ...)
(SetElemInt8x16 ...) => (VPINSRB128 ...)
(SetElemInt16x8 ...) => (VPINSRW128 ...)
(SetElemInt32x4 ...) => (VPINSRD128 ...)
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VBROADCASTSDMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VBROADCASTSS128", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VBROADCASTSS256", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VBROADCASTSS512", argLength: 1, reg: w11, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VBROADCASTSSMasked128", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VBROADCASTSSMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VBROADCASTSSMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VCOMPRESSPDMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VCOMPRESSPDMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VCOMPRESSPDMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBLENDMWMasked512", argLength: 3, reg: w2kw, asm: "VPBLENDMW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBLENDVB128", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBLENDVB256", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTB128", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTB256", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTB512", argLength: 1, reg: w11, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTBMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTBMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTBMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTD128", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTD256", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTD512", argLength: 1, reg: w11, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTDMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTDMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTDMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTQ128", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTQ256", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTQ512", argLength: 1, reg: w11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTQMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTQMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTQMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTW128", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTW256", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTW512", argLength: 1, reg: w11, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPBROADCASTWMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPBROADCASTWMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPBROADCASTWMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCMPEQB128", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPCMPEQB256", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPCMPEQB512", argLength: 2, reg: w2k, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
- {name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
- {name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VINSERTF64X4512", argLength: 2, reg: w21, asm: "VINSERTF64X4", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "AverageUint16x8", argLength: 2, commutative: true},
{name: "AverageUint16x16", argLength: 2, commutative: true},
{name: "AverageUint16x32", argLength: 2, commutative: true},
+ {name: "Broadcast128Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast128Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast128Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
+ {name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
+ {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast256Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast256Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast256Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
+ {name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
+ {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
+ {name: "Broadcast512Float32x4", argLength: 1, commutative: false},
+ {name: "Broadcast512Float64x2", argLength: 1, commutative: false},
+ {name: "Broadcast512Int8x16", argLength: 1, commutative: false},
+ {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
+ {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
+ {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
+ {name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
+ {name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
+ {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
+ {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
+ {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
+ {name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
{name: "CeilFloat32x4", argLength: 1, commutative: false},
{name: "CeilFloat32x8", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},
{name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
OpAMD64VADDSUBPD256
OpAMD64VADDSUBPS128
OpAMD64VADDSUBPS256
+ OpAMD64VBROADCASTSD256
+ OpAMD64VBROADCASTSD512
+ OpAMD64VBROADCASTSDMasked256
+ OpAMD64VBROADCASTSDMasked512
+ OpAMD64VBROADCASTSS128
+ OpAMD64VBROADCASTSS256
+ OpAMD64VBROADCASTSS512
+ OpAMD64VBROADCASTSSMasked128
+ OpAMD64VBROADCASTSSMasked256
+ OpAMD64VBROADCASTSSMasked512
OpAMD64VCOMPRESSPDMasked128
OpAMD64VCOMPRESSPDMasked256
OpAMD64VCOMPRESSPDMasked512
OpAMD64VPBLENDMWMasked512
OpAMD64VPBLENDVB128
OpAMD64VPBLENDVB256
+ OpAMD64VPBROADCASTB128
+ OpAMD64VPBROADCASTB256
+ OpAMD64VPBROADCASTB512
+ OpAMD64VPBROADCASTBMasked128
+ OpAMD64VPBROADCASTBMasked256
+ OpAMD64VPBROADCASTBMasked512
+ OpAMD64VPBROADCASTD128
+ OpAMD64VPBROADCASTD256
+ OpAMD64VPBROADCASTD512
+ OpAMD64VPBROADCASTDMasked128
+ OpAMD64VPBROADCASTDMasked256
+ OpAMD64VPBROADCASTDMasked512
+ OpAMD64VPBROADCASTQ128
+ OpAMD64VPBROADCASTQ256
+ OpAMD64VPBROADCASTQ512
+ OpAMD64VPBROADCASTQMasked128
+ OpAMD64VPBROADCASTQMasked256
+ OpAMD64VPBROADCASTQMasked512
+ OpAMD64VPBROADCASTW128
+ OpAMD64VPBROADCASTW256
+ OpAMD64VPBROADCASTW512
+ OpAMD64VPBROADCASTWMasked128
+ OpAMD64VPBROADCASTWMasked256
+ OpAMD64VPBROADCASTWMasked512
OpAMD64VPCMPEQB128
OpAMD64VPCMPEQB256
OpAMD64VPCMPEQB512
OpAMD64VPRORQMasked128
OpAMD64VPRORQMasked256
OpAMD64VPRORQMasked512
- OpAMD64VPINSRB128
- OpAMD64VPINSRW128
OpAMD64VPINSRD128
OpAMD64VPINSRQ128
+ OpAMD64VPINSRB128
+ OpAMD64VPINSRW128
OpAMD64VINSERTF128256
OpAMD64VINSERTF64X4512
OpAMD64VINSERTI128256
OpAverageUint16x8
OpAverageUint16x16
OpAverageUint16x32
+ OpBroadcast128Float32x4
+ OpBroadcast128Float64x2
+ OpBroadcast128Int8x16
+ OpBroadcast128Int16x8
+ OpBroadcast128Int32x4
+ OpBroadcast128Int64x2
+ OpBroadcast128MaskedFloat32x4
+ OpBroadcast128MaskedFloat64x2
+ OpBroadcast128MaskedInt8x16
+ OpBroadcast128MaskedInt16x8
+ OpBroadcast128MaskedInt32x4
+ OpBroadcast128MaskedInt64x2
+ OpBroadcast128MaskedUint8x16
+ OpBroadcast128MaskedUint16x8
+ OpBroadcast128MaskedUint32x4
+ OpBroadcast128MaskedUint64x2
+ OpBroadcast128Uint8x16
+ OpBroadcast128Uint16x8
+ OpBroadcast128Uint32x4
+ OpBroadcast128Uint64x2
+ OpBroadcast256Float32x4
+ OpBroadcast256Float64x2
+ OpBroadcast256Int8x16
+ OpBroadcast256Int16x8
+ OpBroadcast256Int32x4
+ OpBroadcast256Int64x2
+ OpBroadcast256MaskedFloat32x4
+ OpBroadcast256MaskedFloat64x2
+ OpBroadcast256MaskedInt8x16
+ OpBroadcast256MaskedInt16x8
+ OpBroadcast256MaskedInt32x4
+ OpBroadcast256MaskedInt64x2
+ OpBroadcast256MaskedUint8x16
+ OpBroadcast256MaskedUint16x8
+ OpBroadcast256MaskedUint32x4
+ OpBroadcast256MaskedUint64x2
+ OpBroadcast256Uint8x16
+ OpBroadcast256Uint16x8
+ OpBroadcast256Uint32x4
+ OpBroadcast256Uint64x2
+ OpBroadcast512Float32x4
+ OpBroadcast512Float64x2
+ OpBroadcast512Int8x16
+ OpBroadcast512Int16x8
+ OpBroadcast512Int32x4
+ OpBroadcast512Int64x2
+ OpBroadcast512MaskedFloat32x4
+ OpBroadcast512MaskedFloat64x2
+ OpBroadcast512MaskedInt8x16
+ OpBroadcast512MaskedInt16x8
+ OpBroadcast512MaskedInt32x4
+ OpBroadcast512MaskedInt64x2
+ OpBroadcast512MaskedUint8x16
+ OpBroadcast512MaskedUint16x8
+ OpBroadcast512MaskedUint32x4
+ OpBroadcast512MaskedUint64x2
+ OpBroadcast512Uint8x16
+ OpBroadcast512Uint16x8
+ OpBroadcast512Uint32x4
+ OpBroadcast512Uint64x2
OpCeilFloat32x4
OpCeilFloat32x8
OpCeilFloat64x2
OpRoundToEvenScaledResidueMaskedFloat64x2
OpRoundToEvenScaledResidueMaskedFloat64x4
OpRoundToEvenScaledResidueMaskedFloat64x8
+ OpSetElemFloat32x4
+ OpSetElemFloat64x2
OpSetElemInt8x16
OpSetElemInt16x8
OpSetElemInt32x4
},
},
},
+ {
+ name: "VBROADCASTSD256",
+ argLen: 1,
+ asm: x86.AVBROADCASTSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSD512",
+ argLen: 1,
+ asm: x86.AVBROADCASTSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSDMasked256",
+ argLen: 2,
+ asm: x86.AVBROADCASTSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSDMasked512",
+ argLen: 2,
+ asm: x86.AVBROADCASTSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSS128",
+ argLen: 1,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSS256",
+ argLen: 1,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSS512",
+ argLen: 1,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSSMasked128",
+ argLen: 2,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSSMasked256",
+ argLen: 2,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VBROADCASTSSMasked512",
+ argLen: 2,
+ asm: x86.AVBROADCASTSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VCOMPRESSPDMasked128",
argLen: 2,
},
},
},
+ {
+ name: "VPBROADCASTB128",
+ argLen: 1,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTB256",
+ argLen: 1,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTB512",
+ argLen: 1,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTBMasked128",
+ argLen: 2,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTBMasked256",
+ argLen: 2,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTBMasked512",
+ argLen: 2,
+ asm: x86.AVPBROADCASTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTD128",
+ argLen: 1,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTD256",
+ argLen: 1,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTD512",
+ argLen: 1,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTDMasked128",
+ argLen: 2,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTDMasked256",
+ argLen: 2,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTDMasked512",
+ argLen: 2,
+ asm: x86.AVPBROADCASTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQ128",
+ argLen: 1,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQ256",
+ argLen: 1,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQ512",
+ argLen: 1,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQMasked128",
+ argLen: 2,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQMasked256",
+ argLen: 2,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTQMasked512",
+ argLen: 2,
+ asm: x86.AVPBROADCASTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTW128",
+ argLen: 1,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTW256",
+ argLen: 1,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTW512",
+ argLen: 1,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTWMasked128",
+ argLen: 2,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTWMasked256",
+ argLen: 2,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPBROADCASTWMasked512",
+ argLen: 2,
+ asm: x86.AVPBROADCASTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPCMPEQB128",
argLen: 2,
},
},
{
- name: "VPINSRB128",
+ name: "VPINSRD128",
auxType: auxUInt8,
argLen: 2,
- asm: x86.AVPINSRB,
+ asm: x86.AVPINSRD,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPINSRW128",
+ name: "VPINSRQ128",
auxType: auxUInt8,
argLen: 2,
- asm: x86.AVPINSRW,
+ asm: x86.AVPINSRQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPINSRD128",
+ name: "VPINSRB128",
auxType: auxUInt8,
argLen: 2,
- asm: x86.AVPINSRD,
+ asm: x86.AVPINSRB,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPINSRQ128",
+ name: "VPINSRW128",
auxType: auxUInt8,
argLen: 2,
- asm: x86.AVPINSRQ,
+ asm: x86.AVPINSRW,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
commutative: true,
generic: true,
},
+ {
+ name: "Broadcast128Float32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Float64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Int8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Int16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Int32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Int64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedInt64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128MaskedUint64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Uint8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Uint16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Uint32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast128Uint64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Float32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Float64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Int8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Int16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Int32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Int64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedInt64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256MaskedUint64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Uint8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Uint16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Uint32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast256Uint64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Float32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Float64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Int8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Int16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Int32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Int64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedInt64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512MaskedUint64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Uint8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Uint16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Uint32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "Broadcast512Uint64x2",
+ argLen: 1,
+ generic: true,
+ },
{
name: "CeilFloat32x4",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "SetElemFloat32x4",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SetElemFloat64x2",
+ auxType: auxUInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "SetElemInt8x16",
auxType: auxUInt8,
return rewriteValueAMD64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueAMD64_OpBitLen8(v)
+ case OpBroadcast128Float32x4:
+ v.Op = OpAMD64VBROADCASTSS128
+ return true
+ case OpBroadcast128Float64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
+ return true
+ case OpBroadcast128Int16x8:
+ v.Op = OpAMD64VPBROADCASTW128
+ return true
+ case OpBroadcast128Int32x4:
+ v.Op = OpAMD64VPBROADCASTD128
+ return true
+ case OpBroadcast128Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
+ return true
+ case OpBroadcast128Int8x16:
+ v.Op = OpAMD64VPBROADCASTB128
+ return true
+ case OpBroadcast128MaskedFloat32x4:
+ return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
+ case OpBroadcast128MaskedFloat64x2:
+ return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
+ case OpBroadcast128MaskedInt16x8:
+ return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
+ case OpBroadcast128MaskedInt32x4:
+ return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
+ case OpBroadcast128MaskedInt64x2:
+ return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
+ case OpBroadcast128MaskedInt8x16:
+ return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
+ case OpBroadcast128MaskedUint16x8:
+ return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
+ case OpBroadcast128MaskedUint32x4:
+ return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
+ case OpBroadcast128MaskedUint64x2:
+ return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
+ case OpBroadcast128MaskedUint8x16:
+ return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
+ case OpBroadcast128Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW128
+ return true
+ case OpBroadcast128Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD128
+ return true
+ case OpBroadcast128Uint64x2:
+ v.Op = OpAMD64VPBROADCASTQ128
+ return true
+ case OpBroadcast128Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB128
+ return true
+ case OpBroadcast256Float32x4:
+ v.Op = OpAMD64VBROADCASTSS256
+ return true
+ case OpBroadcast256Float64x2:
+ v.Op = OpAMD64VBROADCASTSD256
+ return true
+ case OpBroadcast256Int16x8:
+ v.Op = OpAMD64VPBROADCASTW256
+ return true
+ case OpBroadcast256Int32x4:
+ v.Op = OpAMD64VPBROADCASTD256
+ return true
+ case OpBroadcast256Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ256
+ return true
+ case OpBroadcast256Int8x16:
+ v.Op = OpAMD64VPBROADCASTB256
+ return true
+ case OpBroadcast256MaskedFloat32x4:
+ return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
+ case OpBroadcast256MaskedFloat64x2:
+ return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
+ case OpBroadcast256MaskedInt16x8:
+ return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
+ case OpBroadcast256MaskedInt32x4:
+ return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
+ case OpBroadcast256MaskedInt64x2:
+ return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
+ case OpBroadcast256MaskedInt8x16:
+ return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
+ case OpBroadcast256MaskedUint16x8:
+ return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
+ case OpBroadcast256MaskedUint32x4:
+ return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
+ case OpBroadcast256MaskedUint64x2:
+ return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
+ case OpBroadcast256MaskedUint8x16:
+ return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
+ case OpBroadcast256Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW256
+ return true
+ case OpBroadcast256Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD256
+ return true
+ case OpBroadcast256Uint64x2:
+ v.Op = OpAMD64VPBROADCASTQ256
+ return true
+ case OpBroadcast256Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB256
+ return true
+ case OpBroadcast512Float32x4:
+ v.Op = OpAMD64VBROADCASTSS512
+ return true
+ case OpBroadcast512Float64x2:
+ v.Op = OpAMD64VBROADCASTSD512
+ return true
+ case OpBroadcast512Int16x8:
+ v.Op = OpAMD64VPBROADCASTW512
+ return true
+ case OpBroadcast512Int32x4:
+ v.Op = OpAMD64VPBROADCASTD512
+ return true
+ case OpBroadcast512Int64x2:
+ v.Op = OpAMD64VPBROADCASTQ512
+ return true
+ case OpBroadcast512Int8x16:
+ v.Op = OpAMD64VPBROADCASTB512
+ return true
+ case OpBroadcast512MaskedFloat32x4:
+ return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
+ case OpBroadcast512MaskedFloat64x2:
+ return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
+ case OpBroadcast512MaskedInt16x8:
+ return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
+ case OpBroadcast512MaskedInt32x4:
+ return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
+ case OpBroadcast512MaskedInt64x2:
+ return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
+ case OpBroadcast512MaskedInt8x16:
+ return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
+ case OpBroadcast512MaskedUint16x8:
+ return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
+ case OpBroadcast512MaskedUint32x4:
+ return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
+ case OpBroadcast512MaskedUint64x2:
+ return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
+ case OpBroadcast512MaskedUint8x16:
+ return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
+ case OpBroadcast512Uint16x8:
+ v.Op = OpAMD64VPBROADCASTW512
+ return true
+ case OpBroadcast512Uint32x4:
+ v.Op = OpAMD64VPBROADCASTD512
+ return true
+ case OpBroadcast512Uint64x2:
+ v.Op = OpAMD64VPBROADCASTQ512
+ return true
+ case OpBroadcast512Uint8x16:
+ v.Op = OpAMD64VPBROADCASTB512
+ return true
case OpBswap16:
return rewriteValueAMD64_OpBswap16(v)
case OpBswap32:
return rewriteValueAMD64_OpSelect1(v)
case OpSelectN:
return rewriteValueAMD64_OpSelectN(v)
+ case OpSetElemFloat32x4:
+ v.Op = OpAMD64VPINSRD128
+ return true
+ case OpSetElemFloat64x2:
+ v.Op = OpAMD64VPINSRQ128
+ return true
case OpSetElemInt16x8:
v.Op = OpAMD64VPINSRW128
return true
}
return false
}
+func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedFloat32x4 x mask)
+ // result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VBROADCASTSSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedFloat64x2 x mask)
+ // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedInt16x8 x mask)
+ // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedInt32x4 x mask)
+ // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedInt64x2 x mask)
+ // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedInt8x16 x mask)
+ // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedUint16x8 x mask)
+ // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedUint32x4 x mask)
+ // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedUint64x2 x mask)
+ // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast128MaskedUint8x16 x mask)
+ // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedFloat32x4 x mask)
+ // result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VBROADCASTSSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedFloat64x2 x mask)
+ // result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VBROADCASTSDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedInt16x8 x mask)
+ // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedInt32x4 x mask)
+ // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedInt64x2 x mask)
+ // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedInt8x16 x mask)
+ // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedUint16x8 x mask)
+ // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedUint32x4 x mask)
+ // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedUint64x2 x mask)
+ // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast256MaskedUint8x16 x mask)
+ // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedFloat32x4 x mask)
+ // result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VBROADCASTSSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedFloat64x2 x mask)
+ // result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VBROADCASTSDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedInt16x8 x mask)
+ // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedInt32x4 x mask)
+ // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedInt64x2 x mask)
+ // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedInt8x16 x mask)
+ // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedUint16x8 x mask)
+ // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedUint32x4 x mask)
+ // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedUint64x2 x mask)
+ // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Broadcast512MaskedUint8x16 x mask)
+ // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPBROADCASTBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpBswap16(v *Value) bool {
v_0 := v.Args[0]
// match: (Bswap16 x)
addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
+ addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
floats: []int{32},
}
+type templateData struct {
+ Vec string // the type of the vector, e.g. Float32x4
+ AOrAn string // for documentation, the article "a" or "an"
+ Width int // the bit width of the element type, e.g. 32
+ Vwidth int // the width of the vector type, e.g. 128
+ Count int // the number of elements, e.g. 4
+ WxC string // the width-by-type string, e.g., "32x4"
+ BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
+ Base string // the capitalized Base Type of the vector, e.g., "Float"
+ Type string // the element type, e.g. "float32"
+ OxFF string // a mask for the lowest 'count' bits
+}
+
+func (t templateData) As128BitVec() string {
+ return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width)
+}
+
func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer) {
b := width * count
if b < 128 || b > 512 {
aOrAn = "an"
}
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
- t.Execute(out, struct {
- Vec string // the type of the vector, e.g. Float32x4
- AOrAn string // for documentation, the article "a" or "an"
- Width int // the bit width of the element type, e.g. 32
- Count int // the number of elements, e.g. 4
- WxC string // the width-by-type string, e.g., "32x4"
- BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
- Base string // the capitalized Base Type of the vector, e.g., "Float"
- Type string // the element type, e.g. "float32"
- OxFF string // a mask for the lowest 'count' bits
- }{
- Vec: vType,
- AOrAn: aOrAn,
- Width: width,
- Count: count,
- WxC: wxc,
- BxC: bxc,
- Base: BaseType,
- Type: eType,
- OxFF: oxFF,
+ t.Execute(out, templateData{
+ Vec: vType,
+ AOrAn: aOrAn,
+ Width: width,
+ Vwidth: b,
+ Count: count,
+ WxC: wxc,
+ BxC: bxc,
+ Base: BaseType,
+ Type: eType,
+ OxFF: oxFF,
})
}
var unsafePATemplate = templateOf("unsafe PA helper", `
// pa{{.Vec}} returns a type-unsafe pointer to array that can
-// only be used with partial load/store operations that only
+// only be used with partial load/store operations that only
// access the known-safe portions of the array.
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} {
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0]))
// Merge returns x but with elements set to y where mask is false.
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
-{{- if eq .BxC .WxC }}
+{{- if eq .BxC .WxC -}}
im := mask.AsInt{{.BxC}}()
{{- else}}
im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}()
}
`)
+func (t templateData) CPUfeatureBC() string {
+ switch t.Vwidth {
+ case 128:
+ return "AVX2"
+ case 256:
+ return "AVX2"
+ case 512:
+ if t.Width <= 16 {
+ return "AVX512BW"
+ }
+ return "AVX512F"
+ }
+ panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
+}
+
+var broadcastTemplate = templateOf("Broadcast functions", `
+// Broadcast{{.Vec}} returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature {{.CPUfeatureBC}}
+func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} {
+ var z {{.As128BitVec }}
+ return z.SetElem(0, x).Broadcast{{.Vwidth}}()
+}
+`)
+
func main() {
sl := flag.String("sl", "slice_amd64.go", "file name for slice operations")
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
avx2SmallLoadSlicePartTemplate,
avx2MaskedTemplate,
avx512MaskedTemplate,
+ broadcastTemplate,
)
}
if *ush != "" {
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
+/* Broadcast128 */
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX2
+func (x Float32x4) Broadcast128() Float32x4
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Float64x2) Broadcast128() Float64x2
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast128() Int8x16
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Int16x8) Broadcast128() Int16x8
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Int32x4) Broadcast128() Int32x4
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Int64x2) Broadcast128() Int64x2
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast128() Uint8x16
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Uint16x8) Broadcast128() Uint16x8
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Uint32x4) Broadcast128() Uint32x4
+
+// Broadcast128 copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Uint64x2) Broadcast128() Uint64x2
+
+/* Broadcast128Masked */
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX512
+func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
+
+// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
+// the 128-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
+
+/* Broadcast256 */
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX2
+func (x Float32x4) Broadcast256() Float32x8
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VBROADCASTSD, CPU Feature: AVX2
+func (x Float64x2) Broadcast256() Float64x4
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast256() Int8x32
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Int16x8) Broadcast256() Int16x16
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Int32x4) Broadcast256() Int32x8
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Int64x2) Broadcast256() Int64x4
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast256() Uint8x32
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Uint16x8) Broadcast256() Uint16x16
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Uint32x4) Broadcast256() Uint32x8
+
+// Broadcast256 copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Uint64x2) Broadcast256() Uint64x4
+
+/* Broadcast256Masked */
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX512
+func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
+
+// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
+// the 256-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
+
+/* Broadcast512 */
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX512
+func (x Float32x4) Broadcast512() Float32x16
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast512() Float64x8
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast512() Int8x64
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast512() Int16x32
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast512() Int32x16
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast512() Int64x8
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast512() Uint8x64
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Uint16x8) Broadcast512() Uint16x32
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Uint32x4) Broadcast512() Uint32x16
+
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast512() Uint64x8
+
+/* Broadcast512Masked */
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VBROADCASTSS, CPU Feature: AVX512
+func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
+
+// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
+
/* Ceil */
// Ceil rounds elements up to the nearest integer.
/* SetElem */
+// SetElem sets a single constant-indexed element's value.
+//
+// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPINSRD, CPU Feature: AVX
+func (x Float32x4) SetElem(index uint8, y float32) Float32x4
+
+// SetElem sets a single constant-indexed element's value.
+//
+// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPINSRQ, CPU Feature: AVX
+func (x Float64x2) SetElem(index uint8, y float64) Float64x2
+
// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
}
}
}
+
+func TestBroadcastUint32x4(t *testing.T) {
+ s := make([]uint32, 4, 4)
+ simd.BroadcastUint32x4(123456789).StoreSlice(s)
+ checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
+}
+
+func TestBroadcastFloat32x8(t *testing.T) {
+ s := make([]float32, 8, 8)
+ simd.BroadcastFloat32x8(123456789).StoreSlice(s)
+ checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
+}
iy := y.AsInt64x8()
return iy.blendMasked(ix, mask).AsFloat64x8()
}
+
+// BroadcastInt8x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt8x16(x int8) Int8x16 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt16x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt16x8(x int16) Int16x8 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt32x4(x int32) Int32x4 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt64x2(x int64) Int64x2 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint8x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint8x16(x uint8) Uint8x16 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint16x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint16x8(x uint16) Uint16x8 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint32x4(x uint32) Uint32x4 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint64x2(x uint64) Uint64x2 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastFloat32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat32x4(x float32) Float32x4 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastFloat64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat64x2(x float64) Float64x2 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt8x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt8x32(x int8) Int8x32 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt16x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt16x16(x int16) Int16x16 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt32x8(x int32) Int32x8 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt64x4(x int64) Int64x4 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint8x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint8x32(x uint8) Uint8x32 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint16x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint16x16(x uint16) Uint16x16 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint32x8(x uint32) Uint32x8 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint64x4(x uint64) Uint64x4 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastFloat32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat32x8(x float32) Float32x8 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastFloat64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat64x4(x float64) Float64x4 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt8x64 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastInt8x64(x int8) Int8x64 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt16x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastInt16x32(x int16) Int16x32 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastInt32x16(x int32) Int32x16 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastInt64x8(x int64) Int64x8 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint8x64 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastUint8x64(x uint8) Uint8x64 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint16x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastUint16x32(x uint16) Uint16x32 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastUint32x16(x uint32) Uint32x16 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastUint64x8(x uint64) Uint64x8 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastFloat32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastFloat32x16(x float32) Float32x16 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastFloat64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastFloat64x8(x float64) Float64x8 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast512()
+}