This CL is generated by CL 693336.
Change-Id: Ic1712d49fcad0544fa3c19b0249d8bc65b347104
Reviewed-on: https://go-review.googlesource.com/c/go/+/693375
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
ssa.OpAMD64VCVTPS2UDQMasked128,
ssa.OpAMD64VCVTPS2UDQMasked256,
ssa.OpAMD64VCVTPS2UDQMasked512,
+ ssa.OpAMD64VEXPANDPSMasked128,
+ ssa.OpAMD64VEXPANDPSMasked256,
+ ssa.OpAMD64VEXPANDPSMasked512,
+ ssa.OpAMD64VEXPANDPDMasked128,
+ ssa.OpAMD64VEXPANDPDMasked256,
+ ssa.OpAMD64VEXPANDPDMasked512,
+ ssa.OpAMD64VPEXPANDBMasked128,
+ ssa.OpAMD64VPEXPANDBMasked256,
+ ssa.OpAMD64VPEXPANDBMasked512,
+ ssa.OpAMD64VPEXPANDWMasked128,
+ ssa.OpAMD64VPEXPANDWMasked256,
+ ssa.OpAMD64VPEXPANDWMasked512,
+ ssa.OpAMD64VPEXPANDDMasked128,
+ ssa.OpAMD64VPEXPANDDMasked256,
+ ssa.OpAMD64VPEXPANDDMasked512,
+ ssa.OpAMD64VPEXPANDQMasked128,
+ ssa.OpAMD64VPEXPANDQMasked256,
+ ssa.OpAMD64VPEXPANDQMasked512,
ssa.OpAMD64VPOPCNTBMasked128,
ssa.OpAMD64VPOPCNTBMasked256,
ssa.OpAMD64VPOPCNTBMasked512,
ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512,
+ ssa.OpAMD64VEXPANDPSMasked128,
+ ssa.OpAMD64VEXPANDPSMasked256,
+ ssa.OpAMD64VEXPANDPSMasked512,
+ ssa.OpAMD64VEXPANDPDMasked128,
+ ssa.OpAMD64VEXPANDPDMasked256,
+ ssa.OpAMD64VEXPANDPDMasked512,
+ ssa.OpAMD64VPEXPANDBMasked128,
+ ssa.OpAMD64VPEXPANDBMasked256,
+ ssa.OpAMD64VPEXPANDBMasked512,
+ ssa.OpAMD64VPEXPANDWMasked128,
+ ssa.OpAMD64VPEXPANDWMasked256,
+ ssa.OpAMD64VPEXPANDWMasked512,
+ ssa.OpAMD64VPEXPANDDMasked128,
+ ssa.OpAMD64VPEXPANDDMasked256,
+ ssa.OpAMD64VPEXPANDDMasked512,
+ ssa.OpAMD64VPEXPANDQMasked128,
+ ssa.OpAMD64VPEXPANDQMasked256,
+ ssa.OpAMD64VPEXPANDQMasked512,
ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512,
(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
+(ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ExpandFloat64x2 x mask) => (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ExpandFloat64x4 x mask) => (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ExpandFloat64x8 x mask) => (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ExpandInt8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(ExpandInt8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(ExpandInt8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(ExpandInt16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ExpandInt16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ExpandInt16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ExpandInt32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ExpandInt32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ExpandInt32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ExpandInt64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ExpandInt64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ExpandInt64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ExpandUint8x16 x mask) => (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(ExpandUint8x32 x mask) => (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(ExpandUint8x64 x mask) => (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(ExpandUint16x8 x mask) => (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ExpandUint16x16 x mask) => (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ExpandUint16x32 x mask) => (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ExpandUint32x4 x mask) => (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ExpandUint32x8 x mask) => (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ExpandUint32x16 x mask) => (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(FloorFloat32x4 x) => (VROUNDPS128 [1] x)
(FloorFloat32x8 x) => (VROUNDPS256 [1] x)
(FloorFloat64x2 x) => (VROUNDPD128 [1] x)
{name: "VDIVPSMasked128", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VDIVPSMasked256", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VDIVPSMasked512", argLength: 3, reg: w2kw, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VEXPANDPDMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VEXPANDPDMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VEXPANDPDMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VEXPANDPSMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VEXPANDPSMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VEXPANDPSMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPERMWMasked128", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPERMWMasked256", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMWMasked512", argLength: 3, reg: w2kw, asm: "VPERMW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPEXPANDBMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPEXPANDBMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPEXPANDBMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPEXPANDDMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPEXPANDDMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPEXPANDDMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPEXPANDQMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPEXPANDQMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPEXPANDQMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPEXPANDWMasked128", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPEXPANDWMasked256", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPEXPANDWMasked512", argLength: 2, reg: wkw, asm: "VPEXPANDW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPHADDD128", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPHADDD256", argLength: 2, reg: v21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPHADDSW128", argLength: 2, reg: v21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "EqualUint64x2", argLength: 2, commutative: true},
{name: "EqualUint64x4", argLength: 2, commutative: true},
{name: "EqualUint64x8", argLength: 2, commutative: true},
+ {name: "ExpandFloat32x4", argLength: 2, commutative: false},
+ {name: "ExpandFloat32x8", argLength: 2, commutative: false},
+ {name: "ExpandFloat32x16", argLength: 2, commutative: false},
+ {name: "ExpandFloat64x2", argLength: 2, commutative: false},
+ {name: "ExpandFloat64x4", argLength: 2, commutative: false},
+ {name: "ExpandFloat64x8", argLength: 2, commutative: false},
+ {name: "ExpandInt8x16", argLength: 2, commutative: false},
+ {name: "ExpandInt8x32", argLength: 2, commutative: false},
+ {name: "ExpandInt8x64", argLength: 2, commutative: false},
+ {name: "ExpandInt16x8", argLength: 2, commutative: false},
+ {name: "ExpandInt16x16", argLength: 2, commutative: false},
+ {name: "ExpandInt16x32", argLength: 2, commutative: false},
+ {name: "ExpandInt32x4", argLength: 2, commutative: false},
+ {name: "ExpandInt32x8", argLength: 2, commutative: false},
+ {name: "ExpandInt32x16", argLength: 2, commutative: false},
+ {name: "ExpandInt64x2", argLength: 2, commutative: false},
+ {name: "ExpandInt64x4", argLength: 2, commutative: false},
+ {name: "ExpandInt64x8", argLength: 2, commutative: false},
+ {name: "ExpandUint8x16", argLength: 2, commutative: false},
+ {name: "ExpandUint8x32", argLength: 2, commutative: false},
+ {name: "ExpandUint8x64", argLength: 2, commutative: false},
+ {name: "ExpandUint16x8", argLength: 2, commutative: false},
+ {name: "ExpandUint16x16", argLength: 2, commutative: false},
+ {name: "ExpandUint16x32", argLength: 2, commutative: false},
+ {name: "ExpandUint32x4", argLength: 2, commutative: false},
+ {name: "ExpandUint32x8", argLength: 2, commutative: false},
+ {name: "ExpandUint32x16", argLength: 2, commutative: false},
+ {name: "ExpandUint64x2", argLength: 2, commutative: false},
+ {name: "ExpandUint64x4", argLength: 2, commutative: false},
+ {name: "ExpandUint64x8", argLength: 2, commutative: false},
{name: "FloorFloat32x4", argLength: 1, commutative: false},
{name: "FloorFloat32x8", argLength: 1, commutative: false},
{name: "FloorFloat64x2", argLength: 1, commutative: false},
OpAMD64VDIVPSMasked128
OpAMD64VDIVPSMasked256
OpAMD64VDIVPSMasked512
+ OpAMD64VEXPANDPDMasked128
+ OpAMD64VEXPANDPDMasked256
+ OpAMD64VEXPANDPDMasked512
+ OpAMD64VEXPANDPSMasked128
+ OpAMD64VEXPANDPSMasked256
+ OpAMD64VEXPANDPSMasked512
OpAMD64VFMADD213PD128
OpAMD64VFMADD213PD256
OpAMD64VFMADD213PD512
OpAMD64VPERMWMasked128
OpAMD64VPERMWMasked256
OpAMD64VPERMWMasked512
+ OpAMD64VPEXPANDBMasked128
+ OpAMD64VPEXPANDBMasked256
+ OpAMD64VPEXPANDBMasked512
+ OpAMD64VPEXPANDDMasked128
+ OpAMD64VPEXPANDDMasked256
+ OpAMD64VPEXPANDDMasked512
+ OpAMD64VPEXPANDQMasked128
+ OpAMD64VPEXPANDQMasked256
+ OpAMD64VPEXPANDQMasked512
+ OpAMD64VPEXPANDWMasked128
+ OpAMD64VPEXPANDWMasked256
+ OpAMD64VPEXPANDWMasked512
OpAMD64VPHADDD128
OpAMD64VPHADDD256
OpAMD64VPHADDSW128
OpEqualUint64x2
OpEqualUint64x4
OpEqualUint64x8
+ OpExpandFloat32x4
+ OpExpandFloat32x8
+ OpExpandFloat32x16
+ OpExpandFloat64x2
+ OpExpandFloat64x4
+ OpExpandFloat64x8
+ OpExpandInt8x16
+ OpExpandInt8x32
+ OpExpandInt8x64
+ OpExpandInt16x8
+ OpExpandInt16x16
+ OpExpandInt16x32
+ OpExpandInt32x4
+ OpExpandInt32x8
+ OpExpandInt32x16
+ OpExpandInt64x2
+ OpExpandInt64x4
+ OpExpandInt64x8
+ OpExpandUint8x16
+ OpExpandUint8x32
+ OpExpandUint8x64
+ OpExpandUint16x8
+ OpExpandUint16x16
+ OpExpandUint16x32
+ OpExpandUint32x4
+ OpExpandUint32x8
+ OpExpandUint32x16
+ OpExpandUint64x2
+ OpExpandUint64x4
+ OpExpandUint64x8
OpFloorFloat32x4
OpFloorFloat32x8
OpFloorFloat64x2
},
},
},
+ {
+ name: "VEXPANDPDMasked128",
+ argLen: 2,
+ asm: x86.AVEXPANDPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VEXPANDPDMasked256",
+ argLen: 2,
+ asm: x86.AVEXPANDPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VEXPANDPDMasked512",
+ argLen: 2,
+ asm: x86.AVEXPANDPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VEXPANDPSMasked128",
+ argLen: 2,
+ asm: x86.AVEXPANDPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VEXPANDPSMasked256",
+ argLen: 2,
+ asm: x86.AVEXPANDPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VEXPANDPSMasked512",
+ argLen: 2,
+ asm: x86.AVEXPANDPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VFMADD213PD128",
argLen: 3,
},
},
},
+ {
+ name: "VPEXPANDBMasked128",
+ argLen: 2,
+ asm: x86.AVPEXPANDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDBMasked256",
+ argLen: 2,
+ asm: x86.AVPEXPANDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDBMasked512",
+ argLen: 2,
+ asm: x86.AVPEXPANDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDDMasked128",
+ argLen: 2,
+ asm: x86.AVPEXPANDD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDDMasked256",
+ argLen: 2,
+ asm: x86.AVPEXPANDD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDDMasked512",
+ argLen: 2,
+ asm: x86.AVPEXPANDD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDQMasked128",
+ argLen: 2,
+ asm: x86.AVPEXPANDQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDQMasked256",
+ argLen: 2,
+ asm: x86.AVPEXPANDQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDQMasked512",
+ argLen: 2,
+ asm: x86.AVPEXPANDQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDWMasked128",
+ argLen: 2,
+ asm: x86.AVPEXPANDW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDWMasked256",
+ argLen: 2,
+ asm: x86.AVPEXPANDW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPEXPANDWMasked512",
+ argLen: 2,
+ asm: x86.AVPEXPANDW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPHADDD128",
argLen: 2,
commutative: true,
generic: true,
},
+ {
+ name: "ExpandFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandFloat32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandFloat32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandFloat64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandFloat64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandInt64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "ExpandUint64x8",
+ argLen: 2,
+ generic: true,
+ },
{
name: "FloorFloat32x4",
argLen: 1,
return true
case OpEqualUint8x64:
return rewriteValueAMD64_OpEqualUint8x64(v)
+ case OpExpandFloat32x16:
+ return rewriteValueAMD64_OpExpandFloat32x16(v)
+ case OpExpandFloat32x4:
+ return rewriteValueAMD64_OpExpandFloat32x4(v)
+ case OpExpandFloat32x8:
+ return rewriteValueAMD64_OpExpandFloat32x8(v)
+ case OpExpandFloat64x2:
+ return rewriteValueAMD64_OpExpandFloat64x2(v)
+ case OpExpandFloat64x4:
+ return rewriteValueAMD64_OpExpandFloat64x4(v)
+ case OpExpandFloat64x8:
+ return rewriteValueAMD64_OpExpandFloat64x8(v)
+ case OpExpandInt16x16:
+ return rewriteValueAMD64_OpExpandInt16x16(v)
+ case OpExpandInt16x32:
+ return rewriteValueAMD64_OpExpandInt16x32(v)
+ case OpExpandInt16x8:
+ return rewriteValueAMD64_OpExpandInt16x8(v)
+ case OpExpandInt32x16:
+ return rewriteValueAMD64_OpExpandInt32x16(v)
+ case OpExpandInt32x4:
+ return rewriteValueAMD64_OpExpandInt32x4(v)
+ case OpExpandInt32x8:
+ return rewriteValueAMD64_OpExpandInt32x8(v)
+ case OpExpandInt64x2:
+ return rewriteValueAMD64_OpExpandInt64x2(v)
+ case OpExpandInt64x4:
+ return rewriteValueAMD64_OpExpandInt64x4(v)
+ case OpExpandInt64x8:
+ return rewriteValueAMD64_OpExpandInt64x8(v)
+ case OpExpandInt8x16:
+ return rewriteValueAMD64_OpExpandInt8x16(v)
+ case OpExpandInt8x32:
+ return rewriteValueAMD64_OpExpandInt8x32(v)
+ case OpExpandInt8x64:
+ return rewriteValueAMD64_OpExpandInt8x64(v)
+ case OpExpandUint16x16:
+ return rewriteValueAMD64_OpExpandUint16x16(v)
+ case OpExpandUint16x32:
+ return rewriteValueAMD64_OpExpandUint16x32(v)
+ case OpExpandUint16x8:
+ return rewriteValueAMD64_OpExpandUint16x8(v)
+ case OpExpandUint32x16:
+ return rewriteValueAMD64_OpExpandUint32x16(v)
+ case OpExpandUint32x4:
+ return rewriteValueAMD64_OpExpandUint32x4(v)
+ case OpExpandUint32x8:
+ return rewriteValueAMD64_OpExpandUint32x8(v)
+ case OpExpandUint64x2:
+ return rewriteValueAMD64_OpExpandUint64x2(v)
+ case OpExpandUint64x4:
+ return rewriteValueAMD64_OpExpandUint64x4(v)
+ case OpExpandUint64x8:
+ return rewriteValueAMD64_OpExpandUint64x8(v)
+ case OpExpandUint8x16:
+ return rewriteValueAMD64_OpExpandUint8x16(v)
+ case OpExpandUint8x32:
+ return rewriteValueAMD64_OpExpandUint8x32(v)
+ case OpExpandUint8x64:
+ return rewriteValueAMD64_OpExpandUint8x64(v)
case OpFMA:
return rewriteValueAMD64_OpFMA(v)
case OpFloor:
return true
}
}
+func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat32x16 x mask)
+ // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat32x4 x mask)
+ // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat32x8 x mask)
+ // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat64x2 x mask)
+ // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat64x4 x mask)
+ // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandFloat64x8 x mask)
+ // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VEXPANDPDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt16x16 x mask)
+ // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt16x32 x mask)
+ // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt16x8 x mask)
+ // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt32x16 x mask)
+ // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt32x4 x mask)
+ // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt32x8 x mask)
+ // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt64x2 x mask)
+ // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt64x4 x mask)
+ // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt64x8 x mask)
+ // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt8x16 x mask)
+ // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt8x32 x mask)
+ // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandInt8x64 x mask)
+ // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint16x16 x mask)
+ // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint16x32 x mask)
+ // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint16x8 x mask)
+ // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint32x16 x mask)
+ // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint32x4 x mask)
+ // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint32x8 x mask)
+ // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint64x2 x mask)
+ // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint64x4 x mask)
+ // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint64x8 x mask)
+ // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint8x16 x mask)
+ // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint8x32 x mask)
+ // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (ExpandUint8x64 x mask)
+ // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPEXPANDBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpFMA(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Expand", opLen2(ssa.OpExpandFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.Expand", opLen2(ssa.OpExpandFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Expand", opLen2(ssa.OpExpandFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Expand", opLen2(ssa.OpExpandInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.Expand", opLen2(ssa.OpExpandInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Expand", opLen2(ssa.OpExpandInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Expand", opLen2(ssa.OpExpandInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Expand", opLen2(ssa.OpExpandInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Expand", opLen2(ssa.OpExpandInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.Expand", opLen2(ssa.OpExpandInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.Expand", opLen2(ssa.OpExpandInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.Expand", opLen2(ssa.OpExpandInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.Expand", opLen2(ssa.OpExpandInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.Expand", opLen2(ssa.OpExpandInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.Expand", opLen2(ssa.OpExpandInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.Expand", opLen2(ssa.OpExpandUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.Expand", opLen2(ssa.OpExpandUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.Expand", opLen2(ssa.OpExpandUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Expand", opLen2(ssa.OpExpandUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Expand", opLen2(ssa.OpExpandUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Expand", opLen2(ssa.OpExpandUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Expand", opLen2(ssa.OpExpandUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Expand", opLen2(ssa.OpExpandUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Expand", opLen2(ssa.OpExpandUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
// Asm: VPCMPUQ, CPU Feature: AVX512F
func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+/* Expand */
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPS, CPU Feature: AVX512F
+func (x Float32x4) Expand(mask Mask32x4) Float32x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPS, CPU Feature: AVX512F
+func (x Float32x8) Expand(mask Mask32x8) Float32x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPS, CPU Feature: AVX512F
+func (x Float32x16) Expand(mask Mask32x16) Float32x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPD, CPU Feature: AVX512F
+func (x Float64x2) Expand(mask Mask64x2) Float64x2
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPD, CPU Feature: AVX512F
+func (x Float64x4) Expand(mask Mask64x4) Float64x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VEXPANDPD, CPU Feature: AVX512F
+func (x Float64x8) Expand(mask Mask64x8) Float64x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Int8x16) Expand(mask Mask8x16) Int8x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Int8x32) Expand(mask Mask8x32) Int8x32
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Int8x64) Expand(mask Mask8x64) Int8x64
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Int16x8) Expand(mask Mask16x8) Int16x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Int16x16) Expand(mask Mask16x16) Int16x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Int16x32) Expand(mask Mask16x32) Int16x32
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Int32x4) Expand(mask Mask32x4) Int32x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Int32x8) Expand(mask Mask32x8) Int32x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Int32x16) Expand(mask Mask32x16) Int32x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Int64x2) Expand(mask Mask64x2) Int64x2
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Int64x4) Expand(mask Mask64x4) Int64x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Int64x8) Expand(mask Mask64x8) Int64x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Uint8x16) Expand(mask Mask8x16) Uint8x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Uint8x32) Expand(mask Mask8x32) Uint8x32
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
+func (x Uint8x64) Expand(mask Mask8x64) Uint8x64
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Uint16x8) Expand(mask Mask16x8) Uint16x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Uint16x16) Expand(mask Mask16x16) Uint16x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
+func (x Uint16x32) Expand(mask Mask16x32) Uint16x32
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Uint32x4) Expand(mask Mask32x4) Uint32x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Uint32x8) Expand(mask Mask32x8) Uint32x8
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDD, CPU Feature: AVX512F
+func (x Uint32x16) Expand(mask Mask32x16) Uint32x16
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Uint64x2) Expand(mask Mask64x2) Uint64x2
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
+
+// Expand performs an expansion on a vector x whose elements are packed to lower parts.
+// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+//
+// Asm: VPEXPANDQ, CPU Feature: AVX512F
+func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
+
/* Floor */
// Floor rounds elements down to the nearest integer.
}
}
+func TestExpand(t *testing.T) {
+ if !simd.HasAVX512() {
+ t.Skip("Test requires HasAVX512, not available on this hardware")
+ return
+ }
+ v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
+ v0101 := simd.LoadInt32x4Slice([]int32{0, -1, 0, -1})
+ v2400 := v3400.Expand(v0101.AsMask32x4())
+ got := make([]int32, 4)
+ v2400.StoreSlice(got)
+ want := []int32{0, 3, 0, 4}
+ if !slices.Equal(got, want) {
+ t.Errorf("want and got differ, want=%v, got=%v", want, got)
+ }
+}
+
func TestPairDotProdAccumulate(t *testing.T) {
if !simd.HasAVX512GFNI() {
// TODO: this function is actually VNNI, let's implement and call the right check.