ssa.OpAMD64VPADDQ128,
ssa.OpAMD64VPADDQ256,
ssa.OpAMD64VPADDQ512,
+ ssa.OpAMD64VHADDPS128,
+ ssa.OpAMD64VHADDPS256,
+ ssa.OpAMD64VHADDPD128,
+ ssa.OpAMD64VHADDPD256,
+ ssa.OpAMD64VPHADDW128,
+ ssa.OpAMD64VPHADDW256,
+ ssa.OpAMD64VPHADDD128,
+ ssa.OpAMD64VPHADDD256,
+ ssa.OpAMD64VPHADDSW128,
+ ssa.OpAMD64VPHADDSW256,
+ ssa.OpAMD64VPADDSB128,
+ ssa.OpAMD64VPADDSB256,
+ ssa.OpAMD64VPADDSB512,
+ ssa.OpAMD64VPADDSW128,
+ ssa.OpAMD64VPADDSW256,
+ ssa.OpAMD64VPADDSW512,
ssa.OpAMD64VADDSUBPS128,
ssa.OpAMD64VADDSUBPS256,
ssa.OpAMD64VADDSUBPD128,
ssa.OpAMD64VMULPD128,
ssa.OpAMD64VMULPD256,
ssa.OpAMD64VMULPD512,
- ssa.OpAMD64VSCALEFPS128,
- ssa.OpAMD64VSCALEFPS256,
- ssa.OpAMD64VSCALEFPS512,
- ssa.OpAMD64VSCALEFPD128,
- ssa.OpAMD64VSCALEFPD256,
- ssa.OpAMD64VSCALEFPD512,
+ ssa.OpAMD64VPMULLW128,
+ ssa.OpAMD64VPMULLW256,
+ ssa.OpAMD64VPMULLW512,
+ ssa.OpAMD64VPMULLD128,
+ ssa.OpAMD64VPMULLD256,
+ ssa.OpAMD64VPMULLD512,
+ ssa.OpAMD64VPMULLQ128,
+ ssa.OpAMD64VPMULLQ256,
+ ssa.OpAMD64VPMULLQ512,
ssa.OpAMD64VPMULDQ128,
ssa.OpAMD64VPMULDQ256,
ssa.OpAMD64VPMULDQ512,
ssa.OpAMD64VPMULHUW128,
ssa.OpAMD64VPMULHUW256,
ssa.OpAMD64VPMULHUW512,
- ssa.OpAMD64VPMULLW128,
- ssa.OpAMD64VPMULLW256,
- ssa.OpAMD64VPMULLW512,
- ssa.OpAMD64VPMULLD128,
- ssa.OpAMD64VPMULLD256,
- ssa.OpAMD64VPMULLD512,
- ssa.OpAMD64VPMULLQ128,
- ssa.OpAMD64VPMULLQ256,
- ssa.OpAMD64VPMULLQ512,
ssa.OpAMD64VPOR128,
ssa.OpAMD64VPOR256,
ssa.OpAMD64VPORD512,
ssa.OpAMD64VPMADDWD128,
ssa.OpAMD64VPMADDWD256,
ssa.OpAMD64VPMADDWD512,
- ssa.OpAMD64VHADDPS128,
- ssa.OpAMD64VHADDPS256,
- ssa.OpAMD64VHADDPD128,
- ssa.OpAMD64VHADDPD256,
- ssa.OpAMD64VPHADDW128,
- ssa.OpAMD64VPHADDW256,
- ssa.OpAMD64VPHADDD128,
- ssa.OpAMD64VPHADDD256,
- ssa.OpAMD64VHSUBPS128,
- ssa.OpAMD64VHSUBPS256,
- ssa.OpAMD64VHSUBPD128,
- ssa.OpAMD64VHSUBPD256,
- ssa.OpAMD64VPHSUBW128,
- ssa.OpAMD64VPHSUBW256,
- ssa.OpAMD64VPHSUBD128,
- ssa.OpAMD64VPHSUBD256,
ssa.OpAMD64VPERMB128,
ssa.OpAMD64VPERMB256,
ssa.OpAMD64VPERMB512,
ssa.OpAMD64VPRORVQ128,
ssa.OpAMD64VPRORVQ256,
ssa.OpAMD64VPRORVQ512,
- ssa.OpAMD64VPADDSB128,
- ssa.OpAMD64VPADDSB256,
- ssa.OpAMD64VPADDSB512,
- ssa.OpAMD64VPADDSW128,
- ssa.OpAMD64VPADDSW256,
- ssa.OpAMD64VPADDSW512,
- ssa.OpAMD64VPHADDSW128,
- ssa.OpAMD64VPHADDSW256,
- ssa.OpAMD64VPHSUBSW128,
- ssa.OpAMD64VPHSUBSW256,
- ssa.OpAMD64VPSUBSB128,
- ssa.OpAMD64VPSUBSB256,
- ssa.OpAMD64VPSUBSB512,
- ssa.OpAMD64VPSUBSW128,
- ssa.OpAMD64VPSUBSW256,
- ssa.OpAMD64VPSUBSW512,
ssa.OpAMD64VPMADDUBSW128,
ssa.OpAMD64VPMADDUBSW256,
ssa.OpAMD64VPMADDUBSW512,
+ ssa.OpAMD64VSCALEFPS128,
+ ssa.OpAMD64VSCALEFPS256,
+ ssa.OpAMD64VSCALEFPS512,
+ ssa.OpAMD64VSCALEFPD128,
+ ssa.OpAMD64VSCALEFPD256,
+ ssa.OpAMD64VSCALEFPD512,
ssa.OpAMD64VPSLLVW128,
ssa.OpAMD64VPSLLVW256,
ssa.OpAMD64VPSLLVW512,
ssa.OpAMD64VPSUBQ128,
ssa.OpAMD64VPSUBQ256,
ssa.OpAMD64VPSUBQ512,
+ ssa.OpAMD64VHSUBPS128,
+ ssa.OpAMD64VHSUBPS256,
+ ssa.OpAMD64VHSUBPD128,
+ ssa.OpAMD64VHSUBPD256,
+ ssa.OpAMD64VPHSUBW128,
+ ssa.OpAMD64VPHSUBW256,
+ ssa.OpAMD64VPHSUBD128,
+ ssa.OpAMD64VPHSUBD256,
+ ssa.OpAMD64VPHSUBSW128,
+ ssa.OpAMD64VPHSUBSW256,
+ ssa.OpAMD64VPSUBSB128,
+ ssa.OpAMD64VPSUBSB256,
+ ssa.OpAMD64VPSUBSB512,
+ ssa.OpAMD64VPSUBSW128,
+ ssa.OpAMD64VPSUBSW256,
+ ssa.OpAMD64VPSUBSW512,
ssa.OpAMD64VPXOR128,
ssa.OpAMD64VPXOR256,
ssa.OpAMD64VPXORD512,
ssa.OpAMD64VPADDQMasked128,
ssa.OpAMD64VPADDQMasked256,
ssa.OpAMD64VPADDQMasked512,
+ ssa.OpAMD64VPADDSBMasked128,
+ ssa.OpAMD64VPADDSBMasked256,
+ ssa.OpAMD64VPADDSBMasked512,
+ ssa.OpAMD64VPADDSWMasked128,
+ ssa.OpAMD64VPADDSWMasked256,
+ ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512,
ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512,
- ssa.OpAMD64VSCALEFPSMasked128,
- ssa.OpAMD64VSCALEFPSMasked256,
- ssa.OpAMD64VSCALEFPSMasked512,
- ssa.OpAMD64VSCALEFPDMasked128,
- ssa.OpAMD64VSCALEFPDMasked256,
- ssa.OpAMD64VSCALEFPDMasked512,
ssa.OpAMD64VPMULDQMasked128,
ssa.OpAMD64VPMULDQMasked256,
ssa.OpAMD64VPMULDQMasked512,
ssa.OpAMD64VPMULHUWMasked128,
ssa.OpAMD64VPMULHUWMasked256,
ssa.OpAMD64VPMULHUWMasked512,
+ ssa.OpAMD64VMULPSMasked128,
+ ssa.OpAMD64VMULPSMasked256,
+ ssa.OpAMD64VMULPSMasked512,
+ ssa.OpAMD64VMULPDMasked128,
+ ssa.OpAMD64VMULPDMasked256,
+ ssa.OpAMD64VMULPDMasked512,
ssa.OpAMD64VPMULLWMasked128,
ssa.OpAMD64VPMULLWMasked256,
ssa.OpAMD64VPMULLWMasked512,
ssa.OpAMD64VPMULLQMasked128,
ssa.OpAMD64VPMULLQMasked256,
ssa.OpAMD64VPMULLQMasked512,
- ssa.OpAMD64VMULPSMasked128,
- ssa.OpAMD64VMULPSMasked256,
- ssa.OpAMD64VMULPSMasked512,
- ssa.OpAMD64VMULPDMasked128,
- ssa.OpAMD64VMULPDMasked256,
- ssa.OpAMD64VMULPDMasked512,
ssa.OpAMD64VPORDMasked128,
ssa.OpAMD64VPORDMasked256,
ssa.OpAMD64VPORDMasked512,
ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512,
- ssa.OpAMD64VPADDSBMasked128,
- ssa.OpAMD64VPADDSBMasked256,
- ssa.OpAMD64VPADDSBMasked512,
- ssa.OpAMD64VPADDSWMasked128,
- ssa.OpAMD64VPADDSWMasked256,
- ssa.OpAMD64VPADDSWMasked512,
- ssa.OpAMD64VPSUBSBMasked128,
- ssa.OpAMD64VPSUBSBMasked256,
- ssa.OpAMD64VPSUBSBMasked512,
- ssa.OpAMD64VPSUBSWMasked128,
- ssa.OpAMD64VPSUBSWMasked256,
- ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
+ ssa.OpAMD64VSCALEFPSMasked128,
+ ssa.OpAMD64VSCALEFPSMasked256,
+ ssa.OpAMD64VSCALEFPSMasked512,
+ ssa.OpAMD64VSCALEFPDMasked128,
+ ssa.OpAMD64VSCALEFPDMasked256,
+ ssa.OpAMD64VSCALEFPDMasked512,
ssa.OpAMD64VPSLLVWMasked128,
ssa.OpAMD64VPSLLVWMasked256,
ssa.OpAMD64VPSLLVWMasked512,
ssa.OpAMD64VPSUBQMasked128,
ssa.OpAMD64VPSUBQMasked256,
ssa.OpAMD64VPSUBQMasked512,
+ ssa.OpAMD64VPSUBSBMasked128,
+ ssa.OpAMD64VPSUBSBMasked256,
+ ssa.OpAMD64VPSUBSBMasked512,
+ ssa.OpAMD64VPSUBSWMasked128,
+ ssa.OpAMD64VPSUBSWMasked256,
+ ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPXORDMasked128,
ssa.OpAMD64VPXORDMasked256,
ssa.OpAMD64VPXORDMasked512,
ssa.OpAMD64VPADDQMasked128,
ssa.OpAMD64VPADDQMasked256,
ssa.OpAMD64VPADDQMasked512,
+ ssa.OpAMD64VPADDSBMasked128,
+ ssa.OpAMD64VPADDSBMasked256,
+ ssa.OpAMD64VPADDSBMasked512,
+ ssa.OpAMD64VPADDSWMasked128,
+ ssa.OpAMD64VPADDSWMasked256,
+ ssa.OpAMD64VPADDSWMasked512,
ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512,
ssa.OpAMD64VRNDSCALEPDMasked128,
ssa.OpAMD64VRNDSCALEPDMasked256,
ssa.OpAMD64VRNDSCALEPDMasked512,
+ ssa.OpAMD64VREDUCEPSMasked128,
+ ssa.OpAMD64VREDUCEPSMasked256,
+ ssa.OpAMD64VREDUCEPSMasked512,
+ ssa.OpAMD64VREDUCEPDMasked128,
+ ssa.OpAMD64VREDUCEPDMasked256,
+ ssa.OpAMD64VREDUCEPDMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
ssa.OpAMD64VCVTPS2UDQMasked128,
ssa.OpAMD64VCVTPS2UDQMasked256,
ssa.OpAMD64VCVTPS2UDQMasked512,
- ssa.OpAMD64VREDUCEPSMasked128,
- ssa.OpAMD64VREDUCEPSMasked256,
- ssa.OpAMD64VREDUCEPSMasked512,
- ssa.OpAMD64VREDUCEPDMasked128,
- ssa.OpAMD64VREDUCEPDMasked256,
- ssa.OpAMD64VREDUCEPDMasked512,
ssa.OpAMD64VDIVPSMasked128,
ssa.OpAMD64VDIVPSMasked256,
ssa.OpAMD64VDIVPSMasked512,
ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512,
- ssa.OpAMD64VSCALEFPSMasked128,
- ssa.OpAMD64VSCALEFPSMasked256,
- ssa.OpAMD64VSCALEFPSMasked512,
- ssa.OpAMD64VSCALEFPDMasked128,
- ssa.OpAMD64VSCALEFPDMasked256,
- ssa.OpAMD64VSCALEFPDMasked512,
ssa.OpAMD64VPMULDQMasked128,
ssa.OpAMD64VPMULDQMasked256,
ssa.OpAMD64VPMULDQMasked512,
ssa.OpAMD64VPMULHUWMasked128,
ssa.OpAMD64VPMULHUWMasked256,
ssa.OpAMD64VPMULHUWMasked512,
+ ssa.OpAMD64VMULPSMasked128,
+ ssa.OpAMD64VMULPSMasked256,
+ ssa.OpAMD64VMULPSMasked512,
+ ssa.OpAMD64VMULPDMasked128,
+ ssa.OpAMD64VMULPDMasked256,
+ ssa.OpAMD64VMULPDMasked512,
ssa.OpAMD64VPMULLWMasked128,
ssa.OpAMD64VPMULLWMasked256,
ssa.OpAMD64VPMULLWMasked512,
ssa.OpAMD64VPMULLQMasked128,
ssa.OpAMD64VPMULLQMasked256,
ssa.OpAMD64VPMULLQMasked512,
- ssa.OpAMD64VMULPSMasked128,
- ssa.OpAMD64VMULPSMasked256,
- ssa.OpAMD64VMULPSMasked512,
- ssa.OpAMD64VMULPDMasked128,
- ssa.OpAMD64VMULPDMasked256,
- ssa.OpAMD64VMULPDMasked512,
ssa.OpAMD64VPORDMasked128,
ssa.OpAMD64VPORDMasked256,
ssa.OpAMD64VPORDMasked512,
ssa.OpAMD64VPDPWSSDSMasked128,
ssa.OpAMD64VPDPWSSDSMasked256,
ssa.OpAMD64VPDPWSSDSMasked512,
- ssa.OpAMD64VPADDSBMasked128,
- ssa.OpAMD64VPADDSBMasked256,
- ssa.OpAMD64VPADDSBMasked512,
- ssa.OpAMD64VPADDSWMasked128,
- ssa.OpAMD64VPADDSWMasked256,
- ssa.OpAMD64VPADDSWMasked512,
- ssa.OpAMD64VPSUBSBMasked128,
- ssa.OpAMD64VPSUBSBMasked256,
- ssa.OpAMD64VPSUBSBMasked512,
- ssa.OpAMD64VPSUBSWMasked128,
- ssa.OpAMD64VPSUBSWMasked256,
- ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VPDPBUSDSMasked128,
ssa.OpAMD64VPDPBUSDSMasked256,
ssa.OpAMD64VPDPBUSDSMasked512,
+ ssa.OpAMD64VSCALEFPSMasked128,
+ ssa.OpAMD64VSCALEFPSMasked256,
+ ssa.OpAMD64VSCALEFPSMasked512,
+ ssa.OpAMD64VSCALEFPDMasked128,
+ ssa.OpAMD64VSCALEFPDMasked256,
+ ssa.OpAMD64VSCALEFPDMasked512,
ssa.OpAMD64VPSHLDWMasked128,
ssa.OpAMD64VPSHLDWMasked256,
ssa.OpAMD64VPSHLDWMasked512,
ssa.OpAMD64VPSUBQMasked128,
ssa.OpAMD64VPSUBQMasked256,
ssa.OpAMD64VPSUBQMasked512,
+ ssa.OpAMD64VPSUBSBMasked128,
+ ssa.OpAMD64VPSUBSBMasked256,
+ ssa.OpAMD64VPSUBSBMasked512,
+ ssa.OpAMD64VPSUBSWMasked128,
+ ssa.OpAMD64VPSUBSWMasked256,
+ ssa.OpAMD64VPSUBSWMasked512,
ssa.OpAMD64VPDPBUSDMasked128,
ssa.OpAMD64VPDPBUSDMasked256,
ssa.OpAMD64VPDPBUSDMasked512,
(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(AddPairsFloat32x4 ...) => (VHADDPS128 ...)
+(AddPairsFloat32x8 ...) => (VHADDPS256 ...)
+(AddPairsFloat64x2 ...) => (VHADDPD128 ...)
+(AddPairsFloat64x4 ...) => (VHADDPD256 ...)
+(AddPairsInt16x8 ...) => (VPHADDW128 ...)
+(AddPairsInt16x16 ...) => (VPHADDW256 ...)
+(AddPairsInt32x4 ...) => (VPHADDD128 ...)
+(AddPairsInt32x8 ...) => (VPHADDD256 ...)
+(AddPairsUint16x8 ...) => (VPHADDW128 ...)
+(AddPairsUint16x16 ...) => (VPHADDW256 ...)
+(AddPairsUint32x4 ...) => (VPHADDD128 ...)
+(AddPairsUint32x8 ...) => (VPHADDD256 ...)
+(AddPairsSaturatedInt16x8 ...) => (VPHADDSW128 ...)
+(AddPairsSaturatedInt16x16 ...) => (VPHADDSW256 ...)
+(AddSaturatedInt8x16 ...) => (VPADDSB128 ...)
+(AddSaturatedInt8x32 ...) => (VPADDSB256 ...)
+(AddSaturatedInt8x64 ...) => (VPADDSB512 ...)
+(AddSaturatedInt16x8 ...) => (VPADDSW128 ...)
+(AddSaturatedInt16x16 ...) => (VPADDSW256 ...)
+(AddSaturatedInt16x32 ...) => (VPADDSW512 ...)
+(AddSaturatedUint8x16 ...) => (VPADDSB128 ...)
+(AddSaturatedUint8x32 ...) => (VPADDSB256 ...)
+(AddSaturatedUint8x64 ...) => (VPADDSB512 ...)
+(AddSaturatedUint16x8 ...) => (VPADDSW128 ...)
+(AddSaturatedUint16x16 ...) => (VPADDSW256 ...)
+(AddSaturatedUint16x32 ...) => (VPADDSW512 ...)
+(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
(AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
(AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
(CeilFloat64x4 x) => (VROUNDPD256 [2] x)
-(CeilWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x)
-(CeilWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x)
-(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x)
-(CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
-(CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
-(CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
-(CeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(CeilScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x)
+(CeilScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x)
+(CeilScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x)
+(CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
+(CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
+(CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
+(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
+(CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
+(CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
+(CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
+(CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
+(CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
+(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
-(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
-(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
-(DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
-(DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
-(DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
-(DiffWithCeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DiffWithCeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
-(DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
-(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
-(DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
-(DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
-(DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
-(DiffWithFloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DiffWithFloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
-(DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
-(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
-(DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
-(DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
-(DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(DiffWithRoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DiffWithRoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
-(DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
-(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
-(DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
-(DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
-(DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
-(DiffWithTruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DiffWithTruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(DivFloat32x4 ...) => (VDIVPS128 ...)
(DivFloat32x8 ...) => (VDIVPS256 ...)
(DivFloat32x16 ...) => (VDIVPS512 ...)
(FloorFloat32x8 x) => (VROUNDPS256 [1] x)
(FloorFloat64x2 x) => (VROUNDPD128 [1] x)
(FloorFloat64x4 x) => (VROUNDPD256 [1] x)
-(FloorWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x)
-(FloorWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x)
-(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x)
-(FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
-(FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
-(FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
-(FloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(FloorScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x)
+(FloorScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x)
+(FloorScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x)
+(FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
+(FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
+(FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
+(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
+(FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
+(FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
+(FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
+(FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
+(FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
+(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...)
(FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...)
(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...)
(MulFloat64x2 ...) => (VMULPD128 ...)
(MulFloat64x4 ...) => (VMULPD256 ...)
(MulFloat64x8 ...) => (VMULPD512 ...)
-(MulByPowOf2Float32x4 ...) => (VSCALEFPS128 ...)
-(MulByPowOf2Float32x8 ...) => (VSCALEFPS256 ...)
-(MulByPowOf2Float32x16 ...) => (VSCALEFPS512 ...)
-(MulByPowOf2Float64x2 ...) => (VSCALEFPD128 ...)
-(MulByPowOf2Float64x4 ...) => (VSCALEFPD256 ...)
-(MulByPowOf2Float64x8 ...) => (VSCALEFPD512 ...)
-(MulByPowOf2MaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulByPowOf2MaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulByPowOf2MaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulByPowOf2MaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulByPowOf2MaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulByPowOf2MaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MulInt16x8 ...) => (VPMULLW128 ...)
+(MulInt16x16 ...) => (VPMULLW256 ...)
+(MulInt16x32 ...) => (VPMULLW512 ...)
+(MulInt32x4 ...) => (VPMULLD128 ...)
+(MulInt32x8 ...) => (VPMULLD256 ...)
+(MulInt32x16 ...) => (VPMULLD512 ...)
+(MulInt64x2 ...) => (VPMULLQ128 ...)
+(MulInt64x4 ...) => (VPMULLQ256 ...)
+(MulInt64x8 ...) => (VPMULLQ512 ...)
(MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
(MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
(MulEvenWidenInt64x2 ...) => (VPMULDQ128 ...)
(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulLowInt16x8 ...) => (VPMULLW128 ...)
-(MulLowInt16x16 ...) => (VPMULLW256 ...)
-(MulLowInt16x32 ...) => (VPMULLW512 ...)
-(MulLowInt32x4 ...) => (VPMULLD128 ...)
-(MulLowInt32x8 ...) => (VPMULLD256 ...)
-(MulLowInt32x16 ...) => (VPMULLD512 ...)
-(MulLowInt64x2 ...) => (VPMULLQ128 ...)
-(MulLowInt64x4 ...) => (VPMULLQ256 ...)
-(MulLowInt64x8 ...) => (VPMULLQ512 ...)
-(MulLowMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulLowMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulLowMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulLowMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulLowMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulLowMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulLowMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulLowMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulLowMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
(NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
(PairDotProdMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(PairDotProdMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(PairDotProdMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PairwiseAddFloat32x4 ...) => (VHADDPS128 ...)
-(PairwiseAddFloat32x8 ...) => (VHADDPS256 ...)
-(PairwiseAddFloat64x2 ...) => (VHADDPD128 ...)
-(PairwiseAddFloat64x4 ...) => (VHADDPD256 ...)
-(PairwiseAddInt16x8 ...) => (VPHADDW128 ...)
-(PairwiseAddInt16x16 ...) => (VPHADDW256 ...)
-(PairwiseAddInt32x4 ...) => (VPHADDD128 ...)
-(PairwiseAddInt32x8 ...) => (VPHADDD256 ...)
-(PairwiseAddUint16x8 ...) => (VPHADDW128 ...)
-(PairwiseAddUint16x16 ...) => (VPHADDW256 ...)
-(PairwiseAddUint32x4 ...) => (VPHADDD128 ...)
-(PairwiseAddUint32x8 ...) => (VPHADDD256 ...)
-(PairwiseSubFloat32x4 ...) => (VHSUBPS128 ...)
-(PairwiseSubFloat32x8 ...) => (VHSUBPS256 ...)
-(PairwiseSubFloat64x2 ...) => (VHSUBPD128 ...)
-(PairwiseSubFloat64x4 ...) => (VHSUBPD256 ...)
-(PairwiseSubInt16x8 ...) => (VPHSUBW128 ...)
-(PairwiseSubInt16x16 ...) => (VPHSUBW256 ...)
-(PairwiseSubInt32x4 ...) => (VPHSUBD128 ...)
-(PairwiseSubInt32x8 ...) => (VPHSUBD256 ...)
-(PairwiseSubUint16x8 ...) => (VPHSUBW128 ...)
-(PairwiseSubUint16x16 ...) => (VPHSUBW256 ...)
-(PairwiseSubUint32x4 ...) => (VPHSUBD128 ...)
-(PairwiseSubUint32x8 ...) => (VPHSUBD256 ...)
(PermuteFloat32x8 ...) => (VPERMPS256 ...)
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
(RoundFloat32x8 x) => (VROUNDPS256 [0] x)
(RoundFloat64x2 x) => (VROUNDPD128 [0] x)
(RoundFloat64x4 x) => (VROUNDPD256 [0] x)
-(RoundWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
-(RoundWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
-(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
-(RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
-(RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
-(RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SaturatedAddInt8x16 ...) => (VPADDSB128 ...)
-(SaturatedAddInt8x32 ...) => (VPADDSB256 ...)
-(SaturatedAddInt8x64 ...) => (VPADDSB512 ...)
-(SaturatedAddInt16x8 ...) => (VPADDSW128 ...)
-(SaturatedAddInt16x16 ...) => (VPADDSW256 ...)
-(SaturatedAddInt16x32 ...) => (VPADDSW512 ...)
-(SaturatedAddUint8x16 ...) => (VPADDSB128 ...)
-(SaturatedAddUint8x32 ...) => (VPADDSB256 ...)
-(SaturatedAddUint8x64 ...) => (VPADDSB512 ...)
-(SaturatedAddUint16x8 ...) => (VPADDSW128 ...)
-(SaturatedAddUint16x16 ...) => (VPADDSW256 ...)
-(SaturatedAddUint16x32 ...) => (VPADDSW512 ...)
+(RoundScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
+(RoundScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
+(RoundScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
+(RoundScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
+(RoundScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
+(RoundScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
+(RoundScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(RoundScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(RoundScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(RoundScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(RoundScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(RoundScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(RoundScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
+(RoundScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
+(RoundScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
+(RoundScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
+(RoundScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
+(RoundScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
+(RoundScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(RoundScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(RoundScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(RoundScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(RoundScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(RoundScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(SaturatedAddDotProdInt32x4 ...) => (VPDPWSSDS128 ...)
(SaturatedAddDotProdInt32x8 ...) => (VPDPWSSDS256 ...)
(SaturatedAddDotProdInt32x16 ...) => (VPDPWSSDS512 ...)
(SaturatedAddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedAddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedAddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SaturatedAddMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SaturatedAddMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...)
-(SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...)
-(SaturatedPairwiseSubInt16x8 ...) => (VPHSUBSW128 ...)
-(SaturatedPairwiseSubInt16x16 ...) => (VPHSUBSW256 ...)
-(SaturatedSubInt8x16 ...) => (VPSUBSB128 ...)
-(SaturatedSubInt8x32 ...) => (VPSUBSB256 ...)
-(SaturatedSubInt8x64 ...) => (VPSUBSB512 ...)
-(SaturatedSubInt16x8 ...) => (VPSUBSW128 ...)
-(SaturatedSubInt16x16 ...) => (VPSUBSW256 ...)
-(SaturatedSubInt16x32 ...) => (VPSUBSW512 ...)
-(SaturatedSubUint8x16 ...) => (VPSUBSB128 ...)
-(SaturatedSubUint8x32 ...) => (VPSUBSB256 ...)
-(SaturatedSubUint8x64 ...) => (VPSUBSB512 ...)
-(SaturatedSubUint16x8 ...) => (VPSUBSW128 ...)
-(SaturatedSubUint16x16 ...) => (VPSUBSW256 ...)
-(SaturatedSubUint16x32 ...) => (VPSUBSW512 ...)
-(SaturatedSubMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SaturatedSubMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SaturatedSubMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SaturatedSubMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SaturatedSubMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SaturatedSubMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SaturatedSubMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
+(ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
+(ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
+(ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
+(ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
+(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
+(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(Set128Float32x8 ...) => (VINSERTF128256 ...)
(Set128Float64x4 ...) => (VINSERTF128256 ...)
(Set128Int8x32 ...) => (VINSERTI128256 ...)
(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
+(SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
+(SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
+(SubPairsFloat64x4 ...) => (VHSUBPD256 ...)
+(SubPairsInt16x8 ...) => (VPHSUBW128 ...)
+(SubPairsInt16x16 ...) => (VPHSUBW256 ...)
+(SubPairsInt32x4 ...) => (VPHSUBD128 ...)
+(SubPairsInt32x8 ...) => (VPHSUBD256 ...)
+(SubPairsUint16x8 ...) => (VPHSUBW128 ...)
+(SubPairsUint16x16 ...) => (VPHSUBW256 ...)
+(SubPairsUint32x4 ...) => (VPHSUBD128 ...)
+(SubPairsUint32x8 ...) => (VPHSUBD256 ...)
+(SubPairsSaturatedInt16x8 ...) => (VPHSUBSW128 ...)
+(SubPairsSaturatedInt16x16 ...) => (VPHSUBSW256 ...)
+(SubSaturatedInt8x16 ...) => (VPSUBSB128 ...)
+(SubSaturatedInt8x32 ...) => (VPSUBSB256 ...)
+(SubSaturatedInt8x64 ...) => (VPSUBSB512 ...)
+(SubSaturatedInt16x8 ...) => (VPSUBSW128 ...)
+(SubSaturatedInt16x16 ...) => (VPSUBSW256 ...)
+(SubSaturatedInt16x32 ...) => (VPSUBSW512 ...)
+(SubSaturatedUint8x16 ...) => (VPSUBSB128 ...)
+(SubSaturatedUint8x32 ...) => (VPSUBSB256 ...)
+(SubSaturatedUint8x64 ...) => (VPSUBSB512 ...)
+(SubSaturatedUint16x8 ...) => (VPSUBSW128 ...)
+(SubSaturatedUint16x16 ...) => (VPSUBSW256 ...)
+(SubSaturatedUint16x32 ...) => (VPSUBSW512 ...)
+(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(TruncFloat32x4 x) => (VROUNDPS128 [3] x)
(TruncFloat32x8 x) => (VROUNDPS256 [3] x)
(TruncFloat64x2 x) => (VROUNDPD128 [3] x)
(TruncFloat64x4 x) => (VROUNDPD256 [3] x)
-(TruncWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x)
-(TruncWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x)
-(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x)
-(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
-(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
-(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
-(TruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(TruncScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x)
+(TruncScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x)
+(TruncScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x)
+(TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
+(TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
+(TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
+(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
+(TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
+(TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
+(TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
+(TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
+(TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
+(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
{name: "AddMaskedUint64x2", argLength: 3, commutative: true},
{name: "AddMaskedUint64x4", argLength: 3, commutative: true},
{name: "AddMaskedUint64x8", argLength: 3, commutative: true},
+ {name: "AddPairsFloat32x4", argLength: 2, commutative: false},
+ {name: "AddPairsFloat32x8", argLength: 2, commutative: false},
+ {name: "AddPairsFloat64x2", argLength: 2, commutative: false},
+ {name: "AddPairsFloat64x4", argLength: 2, commutative: false},
+ {name: "AddPairsInt16x8", argLength: 2, commutative: false},
+ {name: "AddPairsInt16x16", argLength: 2, commutative: false},
+ {name: "AddPairsInt32x4", argLength: 2, commutative: false},
+ {name: "AddPairsInt32x8", argLength: 2, commutative: false},
+ {name: "AddPairsSaturatedInt16x8", argLength: 2, commutative: false},
+ {name: "AddPairsSaturatedInt16x16", argLength: 2, commutative: false},
+ {name: "AddPairsUint16x8", argLength: 2, commutative: false},
+ {name: "AddPairsUint16x16", argLength: 2, commutative: false},
+ {name: "AddPairsUint32x4", argLength: 2, commutative: false},
+ {name: "AddPairsUint32x8", argLength: 2, commutative: false},
+ {name: "AddSaturatedInt8x16", argLength: 2, commutative: true},
+ {name: "AddSaturatedInt8x32", argLength: 2, commutative: true},
+ {name: "AddSaturatedInt8x64", argLength: 2, commutative: true},
+ {name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
+ {name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
+ {name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
+ {name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
+ {name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
+ {name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
+ {name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
+ {name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
+ {name: "AddSaturatedUint16x8", argLength: 2, commutative: true},
+ {name: "AddSaturatedUint16x16", argLength: 2, commutative: true},
+ {name: "AddSaturatedUint16x32", argLength: 2, commutative: true},
{name: "AddSubFloat32x4", argLength: 2, commutative: false},
{name: "AddSubFloat32x8", argLength: 2, commutative: false},
{name: "AddSubFloat64x2", argLength: 2, commutative: false},
{name: "MinUint64x2", argLength: 2, commutative: true},
{name: "MinUint64x4", argLength: 2, commutative: true},
{name: "MinUint64x8", argLength: 2, commutative: true},
- {name: "MulByPowOf2Float32x4", argLength: 2, commutative: false},
- {name: "MulByPowOf2Float32x8", argLength: 2, commutative: false},
- {name: "MulByPowOf2Float32x16", argLength: 2, commutative: false},
- {name: "MulByPowOf2Float64x2", argLength: 2, commutative: false},
- {name: "MulByPowOf2Float64x4", argLength: 2, commutative: false},
- {name: "MulByPowOf2Float64x8", argLength: 2, commutative: false},
- {name: "MulByPowOf2MaskedFloat32x4", argLength: 3, commutative: false},
- {name: "MulByPowOf2MaskedFloat32x8", argLength: 3, commutative: false},
- {name: "MulByPowOf2MaskedFloat32x16", argLength: 3, commutative: false},
- {name: "MulByPowOf2MaskedFloat64x2", argLength: 3, commutative: false},
- {name: "MulByPowOf2MaskedFloat64x4", argLength: 3, commutative: false},
- {name: "MulByPowOf2MaskedFloat64x8", argLength: 3, commutative: false},
{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
{name: "MulEvenWidenInt64x2", argLength: 2, commutative: true},
{name: "MulHighUint16x8", argLength: 2, commutative: true},
{name: "MulHighUint16x16", argLength: 2, commutative: true},
{name: "MulHighUint16x32", argLength: 2, commutative: true},
- {name: "MulLowInt16x8", argLength: 2, commutative: true},
- {name: "MulLowInt16x16", argLength: 2, commutative: true},
- {name: "MulLowInt16x32", argLength: 2, commutative: true},
- {name: "MulLowInt32x4", argLength: 2, commutative: true},
- {name: "MulLowInt32x8", argLength: 2, commutative: true},
- {name: "MulLowInt32x16", argLength: 2, commutative: true},
- {name: "MulLowInt64x2", argLength: 2, commutative: true},
- {name: "MulLowInt64x4", argLength: 2, commutative: true},
- {name: "MulLowInt64x8", argLength: 2, commutative: true},
- {name: "MulLowMaskedInt16x8", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt16x16", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt16x32", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt32x4", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt32x8", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt32x16", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt64x2", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt64x4", argLength: 3, commutative: true},
- {name: "MulLowMaskedInt64x8", argLength: 3, commutative: true},
+ {name: "MulInt16x8", argLength: 2, commutative: true},
+ {name: "MulInt16x16", argLength: 2, commutative: true},
+ {name: "MulInt16x32", argLength: 2, commutative: true},
+ {name: "MulInt32x4", argLength: 2, commutative: true},
+ {name: "MulInt32x8", argLength: 2, commutative: true},
+ {name: "MulInt32x16", argLength: 2, commutative: true},
+ {name: "MulInt64x2", argLength: 2, commutative: true},
+ {name: "MulInt64x4", argLength: 2, commutative: true},
+ {name: "MulInt64x8", argLength: 2, commutative: true},
{name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
{name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
{name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
{name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
{name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
{name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
+ {name: "MulMaskedInt16x8", argLength: 3, commutative: true},
+ {name: "MulMaskedInt16x16", argLength: 3, commutative: true},
+ {name: "MulMaskedInt16x32", argLength: 3, commutative: true},
+ {name: "MulMaskedInt32x4", argLength: 3, commutative: true},
+ {name: "MulMaskedInt32x8", argLength: 3, commutative: true},
+ {name: "MulMaskedInt32x16", argLength: 3, commutative: true},
+ {name: "MulMaskedInt64x2", argLength: 3, commutative: true},
+ {name: "MulMaskedInt64x4", argLength: 3, commutative: true},
+ {name: "MulMaskedInt64x8", argLength: 3, commutative: true},
{name: "NotEqualFloat32x4", argLength: 2, commutative: true},
{name: "NotEqualFloat32x8", argLength: 2, commutative: true},
{name: "NotEqualFloat32x16", argLength: 2, commutative: true},
{name: "PairDotProdMaskedInt16x8", argLength: 3, commutative: false},
{name: "PairDotProdMaskedInt16x16", argLength: 3, commutative: false},
{name: "PairDotProdMaskedInt16x32", argLength: 3, commutative: false},
- {name: "PairwiseAddFloat32x4", argLength: 2, commutative: false},
- {name: "PairwiseAddFloat32x8", argLength: 2, commutative: false},
- {name: "PairwiseAddFloat64x2", argLength: 2, commutative: false},
- {name: "PairwiseAddFloat64x4", argLength: 2, commutative: false},
- {name: "PairwiseAddInt16x8", argLength: 2, commutative: false},
- {name: "PairwiseAddInt16x16", argLength: 2, commutative: false},
- {name: "PairwiseAddInt32x4", argLength: 2, commutative: false},
- {name: "PairwiseAddInt32x8", argLength: 2, commutative: false},
- {name: "PairwiseAddUint16x8", argLength: 2, commutative: false},
- {name: "PairwiseAddUint16x16", argLength: 2, commutative: false},
- {name: "PairwiseAddUint32x4", argLength: 2, commutative: false},
- {name: "PairwiseAddUint32x8", argLength: 2, commutative: false},
- {name: "PairwiseSubFloat32x4", argLength: 2, commutative: false},
- {name: "PairwiseSubFloat32x8", argLength: 2, commutative: false},
- {name: "PairwiseSubFloat64x2", argLength: 2, commutative: false},
- {name: "PairwiseSubFloat64x4", argLength: 2, commutative: false},
- {name: "PairwiseSubInt16x8", argLength: 2, commutative: false},
- {name: "PairwiseSubInt16x16", argLength: 2, commutative: false},
- {name: "PairwiseSubInt32x4", argLength: 2, commutative: false},
- {name: "PairwiseSubInt32x8", argLength: 2, commutative: false},
- {name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
- {name: "PairwiseSubUint16x16", argLength: 2, commutative: false},
- {name: "PairwiseSubUint32x4", argLength: 2, commutative: false},
- {name: "PairwiseSubUint32x8", argLength: 2, commutative: false},
{name: "Permute2Float32x4", argLength: 3, commutative: false},
{name: "Permute2Float32x8", argLength: 3, commutative: false},
{name: "Permute2Float32x16", argLength: 3, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x4", argLength: 4, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x8", argLength: 4, commutative: false},
{name: "SaturatedAddDotProdMaskedInt32x16", argLength: 4, commutative: false},
- {name: "SaturatedAddInt8x16", argLength: 2, commutative: true},
- {name: "SaturatedAddInt8x32", argLength: 2, commutative: true},
- {name: "SaturatedAddInt8x64", argLength: 2, commutative: true},
- {name: "SaturatedAddInt16x8", argLength: 2, commutative: true},
- {name: "SaturatedAddInt16x16", argLength: 2, commutative: true},
- {name: "SaturatedAddInt16x32", argLength: 2, commutative: true},
- {name: "SaturatedAddMaskedInt8x16", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedInt8x32", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedInt8x64", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedInt16x8", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedInt16x16", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint8x16", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint8x32", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint8x64", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint16x8", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint16x16", argLength: 3, commutative: true},
- {name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true},
- {name: "SaturatedAddUint8x16", argLength: 2, commutative: true},
- {name: "SaturatedAddUint8x32", argLength: 2, commutative: true},
- {name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
- {name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
- {name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
- {name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
- {name: "SaturatedPairwiseAddInt16x8", argLength: 2, commutative: false},
- {name: "SaturatedPairwiseAddInt16x16", argLength: 2, commutative: false},
- {name: "SaturatedPairwiseSubInt16x8", argLength: 2, commutative: false},
- {name: "SaturatedPairwiseSubInt16x16", argLength: 2, commutative: false},
- {name: "SaturatedSubInt8x16", argLength: 2, commutative: false},
- {name: "SaturatedSubInt8x32", argLength: 2, commutative: false},
- {name: "SaturatedSubInt8x64", argLength: 2, commutative: false},
- {name: "SaturatedSubInt16x8", argLength: 2, commutative: false},
- {name: "SaturatedSubInt16x16", argLength: 2, commutative: false},
- {name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
- {name: "SaturatedSubMaskedInt8x16", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedInt8x32", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedInt8x64", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint8x16", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint8x32", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint8x64", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false},
- {name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false},
- {name: "SaturatedSubUint8x16", argLength: 2, commutative: false},
- {name: "SaturatedSubUint8x32", argLength: 2, commutative: false},
- {name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
- {name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
- {name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
- {name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", argLength: 3, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
+ {name: "ScaleFloat32x4", argLength: 2, commutative: false},
+ {name: "ScaleFloat32x8", argLength: 2, commutative: false},
+ {name: "ScaleFloat32x16", argLength: 2, commutative: false},
+ {name: "ScaleFloat64x2", argLength: 2, commutative: false},
+ {name: "ScaleFloat64x4", argLength: 2, commutative: false},
+ {name: "ScaleFloat64x8", argLength: 2, commutative: false},
+ {name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
+ {name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
+ {name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
+ {name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
+ {name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
+ {name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
{name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
{name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
{name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false},
{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
+ {name: "SubPairsFloat32x4", argLength: 2, commutative: false},
+ {name: "SubPairsFloat32x8", argLength: 2, commutative: false},
+ {name: "SubPairsFloat64x2", argLength: 2, commutative: false},
+ {name: "SubPairsFloat64x4", argLength: 2, commutative: false},
+ {name: "SubPairsInt16x8", argLength: 2, commutative: false},
+ {name: "SubPairsInt16x16", argLength: 2, commutative: false},
+ {name: "SubPairsInt32x4", argLength: 2, commutative: false},
+ {name: "SubPairsInt32x8", argLength: 2, commutative: false},
+ {name: "SubPairsSaturatedInt16x8", argLength: 2, commutative: false},
+ {name: "SubPairsSaturatedInt16x16", argLength: 2, commutative: false},
+ {name: "SubPairsUint16x8", argLength: 2, commutative: false},
+ {name: "SubPairsUint16x16", argLength: 2, commutative: false},
+ {name: "SubPairsUint32x4", argLength: 2, commutative: false},
+ {name: "SubPairsUint32x8", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt8x16", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt8x32", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt8x64", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
+ {name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
+ {name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
+ {name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
+ {name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
+ {name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
+ {name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
+ {name: "SubSaturatedUint16x8", argLength: 2, commutative: false},
+ {name: "SubSaturatedUint16x16", argLength: 2, commutative: false},
+ {name: "SubSaturatedUint16x32", argLength: 2, commutative: false},
{name: "SubUint8x16", argLength: 2, commutative: false},
{name: "SubUint8x32", argLength: 2, commutative: false},
{name: "SubUint8x64", argLength: 2, commutative: false},
{name: "XorUint64x2", argLength: 2, commutative: true},
{name: "XorUint64x4", argLength: 2, commutative: true},
{name: "XorUint64x8", argLength: 2, commutative: true},
- {name: "CeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "CeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithCeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithFloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithRoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "DiffWithTruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "FloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightConcatUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightConcatUint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightConcatUint64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "TruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
}
}
OpAddMaskedUint64x2
OpAddMaskedUint64x4
OpAddMaskedUint64x8
+ OpAddPairsFloat32x4
+ OpAddPairsFloat32x8
+ OpAddPairsFloat64x2
+ OpAddPairsFloat64x4
+ OpAddPairsInt16x8
+ OpAddPairsInt16x16
+ OpAddPairsInt32x4
+ OpAddPairsInt32x8
+ OpAddPairsSaturatedInt16x8
+ OpAddPairsSaturatedInt16x16
+ OpAddPairsUint16x8
+ OpAddPairsUint16x16
+ OpAddPairsUint32x4
+ OpAddPairsUint32x8
+ OpAddSaturatedInt8x16
+ OpAddSaturatedInt8x32
+ OpAddSaturatedInt8x64
+ OpAddSaturatedInt16x8
+ OpAddSaturatedInt16x16
+ OpAddSaturatedInt16x32
+ OpAddSaturatedMaskedInt8x16
+ OpAddSaturatedMaskedInt8x32
+ OpAddSaturatedMaskedInt8x64
+ OpAddSaturatedMaskedInt16x8
+ OpAddSaturatedMaskedInt16x16
+ OpAddSaturatedMaskedInt16x32
+ OpAddSaturatedMaskedUint8x16
+ OpAddSaturatedMaskedUint8x32
+ OpAddSaturatedMaskedUint8x64
+ OpAddSaturatedMaskedUint16x8
+ OpAddSaturatedMaskedUint16x16
+ OpAddSaturatedMaskedUint16x32
+ OpAddSaturatedUint8x16
+ OpAddSaturatedUint8x32
+ OpAddSaturatedUint8x64
+ OpAddSaturatedUint16x8
+ OpAddSaturatedUint16x16
+ OpAddSaturatedUint16x32
OpAddSubFloat32x4
OpAddSubFloat32x8
OpAddSubFloat64x2
OpMinUint64x2
OpMinUint64x4
OpMinUint64x8
- OpMulByPowOf2Float32x4
- OpMulByPowOf2Float32x8
- OpMulByPowOf2Float32x16
- OpMulByPowOf2Float64x2
- OpMulByPowOf2Float64x4
- OpMulByPowOf2Float64x8
- OpMulByPowOf2MaskedFloat32x4
- OpMulByPowOf2MaskedFloat32x8
- OpMulByPowOf2MaskedFloat32x16
- OpMulByPowOf2MaskedFloat64x2
- OpMulByPowOf2MaskedFloat64x4
- OpMulByPowOf2MaskedFloat64x8
OpMulEvenWidenInt32x4
OpMulEvenWidenInt32x8
OpMulEvenWidenInt64x2
OpMulHighUint16x8
OpMulHighUint16x16
OpMulHighUint16x32
- OpMulLowInt16x8
- OpMulLowInt16x16
- OpMulLowInt16x32
- OpMulLowInt32x4
- OpMulLowInt32x8
- OpMulLowInt32x16
- OpMulLowInt64x2
- OpMulLowInt64x4
- OpMulLowInt64x8
- OpMulLowMaskedInt16x8
- OpMulLowMaskedInt16x16
- OpMulLowMaskedInt16x32
- OpMulLowMaskedInt32x4
- OpMulLowMaskedInt32x8
- OpMulLowMaskedInt32x16
- OpMulLowMaskedInt64x2
- OpMulLowMaskedInt64x4
- OpMulLowMaskedInt64x8
+ OpMulInt16x8
+ OpMulInt16x16
+ OpMulInt16x32
+ OpMulInt32x4
+ OpMulInt32x8
+ OpMulInt32x16
+ OpMulInt64x2
+ OpMulInt64x4
+ OpMulInt64x8
OpMulMaskedFloat32x4
OpMulMaskedFloat32x8
OpMulMaskedFloat32x16
OpMulMaskedFloat64x2
OpMulMaskedFloat64x4
OpMulMaskedFloat64x8
+ OpMulMaskedInt16x8
+ OpMulMaskedInt16x16
+ OpMulMaskedInt16x32
+ OpMulMaskedInt32x4
+ OpMulMaskedInt32x8
+ OpMulMaskedInt32x16
+ OpMulMaskedInt64x2
+ OpMulMaskedInt64x4
+ OpMulMaskedInt64x8
OpNotEqualFloat32x4
OpNotEqualFloat32x8
OpNotEqualFloat32x16
OpPairDotProdMaskedInt16x8
OpPairDotProdMaskedInt16x16
OpPairDotProdMaskedInt16x32
- OpPairwiseAddFloat32x4
- OpPairwiseAddFloat32x8
- OpPairwiseAddFloat64x2
- OpPairwiseAddFloat64x4
- OpPairwiseAddInt16x8
- OpPairwiseAddInt16x16
- OpPairwiseAddInt32x4
- OpPairwiseAddInt32x8
- OpPairwiseAddUint16x8
- OpPairwiseAddUint16x16
- OpPairwiseAddUint32x4
- OpPairwiseAddUint32x8
- OpPairwiseSubFloat32x4
- OpPairwiseSubFloat32x8
- OpPairwiseSubFloat64x2
- OpPairwiseSubFloat64x4
- OpPairwiseSubInt16x8
- OpPairwiseSubInt16x16
- OpPairwiseSubInt32x4
- OpPairwiseSubInt32x8
- OpPairwiseSubUint16x8
- OpPairwiseSubUint16x16
- OpPairwiseSubUint32x4
- OpPairwiseSubUint32x8
OpPermute2Float32x4
OpPermute2Float32x8
OpPermute2Float32x16
OpSaturatedAddDotProdMaskedInt32x4
OpSaturatedAddDotProdMaskedInt32x8
OpSaturatedAddDotProdMaskedInt32x16
- OpSaturatedAddInt8x16
- OpSaturatedAddInt8x32
- OpSaturatedAddInt8x64
- OpSaturatedAddInt16x8
- OpSaturatedAddInt16x16
- OpSaturatedAddInt16x32
- OpSaturatedAddMaskedInt8x16
- OpSaturatedAddMaskedInt8x32
- OpSaturatedAddMaskedInt8x64
- OpSaturatedAddMaskedInt16x8
- OpSaturatedAddMaskedInt16x16
- OpSaturatedAddMaskedInt16x32
- OpSaturatedAddMaskedUint8x16
- OpSaturatedAddMaskedUint8x32
- OpSaturatedAddMaskedUint8x64
- OpSaturatedAddMaskedUint16x8
- OpSaturatedAddMaskedUint16x16
- OpSaturatedAddMaskedUint16x32
- OpSaturatedAddUint8x16
- OpSaturatedAddUint8x32
- OpSaturatedAddUint8x64
- OpSaturatedAddUint16x8
- OpSaturatedAddUint16x16
- OpSaturatedAddUint16x32
- OpSaturatedPairwiseAddInt16x8
- OpSaturatedPairwiseAddInt16x16
- OpSaturatedPairwiseSubInt16x8
- OpSaturatedPairwiseSubInt16x16
- OpSaturatedSubInt8x16
- OpSaturatedSubInt8x32
- OpSaturatedSubInt8x64
- OpSaturatedSubInt16x8
- OpSaturatedSubInt16x16
- OpSaturatedSubInt16x32
- OpSaturatedSubMaskedInt8x16
- OpSaturatedSubMaskedInt8x32
- OpSaturatedSubMaskedInt8x64
- OpSaturatedSubMaskedInt16x8
- OpSaturatedSubMaskedInt16x16
- OpSaturatedSubMaskedInt16x32
- OpSaturatedSubMaskedUint8x16
- OpSaturatedSubMaskedUint8x32
- OpSaturatedSubMaskedUint8x64
- OpSaturatedSubMaskedUint16x8
- OpSaturatedSubMaskedUint16x16
- OpSaturatedSubMaskedUint16x32
- OpSaturatedSubUint8x16
- OpSaturatedSubUint8x32
- OpSaturatedSubUint8x64
- OpSaturatedSubUint16x8
- OpSaturatedSubUint16x16
- OpSaturatedSubUint16x32
OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16
OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32
OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8
OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16
+ OpScaleFloat32x4
+ OpScaleFloat32x8
+ OpScaleFloat32x16
+ OpScaleFloat64x2
+ OpScaleFloat64x4
+ OpScaleFloat64x8
+ OpScaleMaskedFloat32x4
+ OpScaleMaskedFloat32x8
+ OpScaleMaskedFloat32x16
+ OpScaleMaskedFloat64x2
+ OpScaleMaskedFloat64x4
+ OpScaleMaskedFloat64x8
OpShiftAllLeftInt16x8
OpShiftAllLeftInt16x16
OpShiftAllLeftInt16x32
OpSubMaskedUint64x2
OpSubMaskedUint64x4
OpSubMaskedUint64x8
+ OpSubPairsFloat32x4
+ OpSubPairsFloat32x8
+ OpSubPairsFloat64x2
+ OpSubPairsFloat64x4
+ OpSubPairsInt16x8
+ OpSubPairsInt16x16
+ OpSubPairsInt32x4
+ OpSubPairsInt32x8
+ OpSubPairsSaturatedInt16x8
+ OpSubPairsSaturatedInt16x16
+ OpSubPairsUint16x8
+ OpSubPairsUint16x16
+ OpSubPairsUint32x4
+ OpSubPairsUint32x8
+ OpSubSaturatedInt8x16
+ OpSubSaturatedInt8x32
+ OpSubSaturatedInt8x64
+ OpSubSaturatedInt16x8
+ OpSubSaturatedInt16x16
+ OpSubSaturatedInt16x32
+ OpSubSaturatedMaskedInt8x16
+ OpSubSaturatedMaskedInt8x32
+ OpSubSaturatedMaskedInt8x64
+ OpSubSaturatedMaskedInt16x8
+ OpSubSaturatedMaskedInt16x16
+ OpSubSaturatedMaskedInt16x32
+ OpSubSaturatedMaskedUint8x16
+ OpSubSaturatedMaskedUint8x32
+ OpSubSaturatedMaskedUint8x64
+ OpSubSaturatedMaskedUint16x8
+ OpSubSaturatedMaskedUint16x16
+ OpSubSaturatedMaskedUint16x32
+ OpSubSaturatedUint8x16
+ OpSubSaturatedUint8x32
+ OpSubSaturatedUint8x64
+ OpSubSaturatedUint16x8
+ OpSubSaturatedUint16x16
+ OpSubSaturatedUint16x32
OpSubUint8x16
OpSubUint8x32
OpSubUint8x64
OpXorUint64x2
OpXorUint64x4
OpXorUint64x8
- OpCeilWithPrecisionFloat32x4
- OpCeilWithPrecisionFloat32x8
- OpCeilWithPrecisionFloat32x16
- OpCeilWithPrecisionFloat64x2
- OpCeilWithPrecisionFloat64x4
- OpCeilWithPrecisionFloat64x8
- OpCeilWithPrecisionMaskedFloat32x4
- OpCeilWithPrecisionMaskedFloat32x8
- OpCeilWithPrecisionMaskedFloat32x16
- OpCeilWithPrecisionMaskedFloat64x2
- OpCeilWithPrecisionMaskedFloat64x4
- OpCeilWithPrecisionMaskedFloat64x8
- OpDiffWithCeilWithPrecisionFloat32x4
- OpDiffWithCeilWithPrecisionFloat32x8
- OpDiffWithCeilWithPrecisionFloat32x16
- OpDiffWithCeilWithPrecisionFloat64x2
- OpDiffWithCeilWithPrecisionFloat64x4
- OpDiffWithCeilWithPrecisionFloat64x8
- OpDiffWithCeilWithPrecisionMaskedFloat32x4
- OpDiffWithCeilWithPrecisionMaskedFloat32x8
- OpDiffWithCeilWithPrecisionMaskedFloat32x16
- OpDiffWithCeilWithPrecisionMaskedFloat64x2
- OpDiffWithCeilWithPrecisionMaskedFloat64x4
- OpDiffWithCeilWithPrecisionMaskedFloat64x8
- OpDiffWithFloorWithPrecisionFloat32x4
- OpDiffWithFloorWithPrecisionFloat32x8
- OpDiffWithFloorWithPrecisionFloat32x16
- OpDiffWithFloorWithPrecisionFloat64x2
- OpDiffWithFloorWithPrecisionFloat64x4
- OpDiffWithFloorWithPrecisionFloat64x8
- OpDiffWithFloorWithPrecisionMaskedFloat32x4
- OpDiffWithFloorWithPrecisionMaskedFloat32x8
- OpDiffWithFloorWithPrecisionMaskedFloat32x16
- OpDiffWithFloorWithPrecisionMaskedFloat64x2
- OpDiffWithFloorWithPrecisionMaskedFloat64x4
- OpDiffWithFloorWithPrecisionMaskedFloat64x8
- OpDiffWithRoundWithPrecisionFloat32x4
- OpDiffWithRoundWithPrecisionFloat32x8
- OpDiffWithRoundWithPrecisionFloat32x16
- OpDiffWithRoundWithPrecisionFloat64x2
- OpDiffWithRoundWithPrecisionFloat64x4
- OpDiffWithRoundWithPrecisionFloat64x8
- OpDiffWithRoundWithPrecisionMaskedFloat32x4
- OpDiffWithRoundWithPrecisionMaskedFloat32x8
- OpDiffWithRoundWithPrecisionMaskedFloat32x16
- OpDiffWithRoundWithPrecisionMaskedFloat64x2
- OpDiffWithRoundWithPrecisionMaskedFloat64x4
- OpDiffWithRoundWithPrecisionMaskedFloat64x8
- OpDiffWithTruncWithPrecisionFloat32x4
- OpDiffWithTruncWithPrecisionFloat32x8
- OpDiffWithTruncWithPrecisionFloat32x16
- OpDiffWithTruncWithPrecisionFloat64x2
- OpDiffWithTruncWithPrecisionFloat64x4
- OpDiffWithTruncWithPrecisionFloat64x8
- OpDiffWithTruncWithPrecisionMaskedFloat32x4
- OpDiffWithTruncWithPrecisionMaskedFloat32x8
- OpDiffWithTruncWithPrecisionMaskedFloat32x16
- OpDiffWithTruncWithPrecisionMaskedFloat64x2
- OpDiffWithTruncWithPrecisionMaskedFloat64x4
- OpDiffWithTruncWithPrecisionMaskedFloat64x8
- OpFloorWithPrecisionFloat32x4
- OpFloorWithPrecisionFloat32x8
- OpFloorWithPrecisionFloat32x16
- OpFloorWithPrecisionFloat64x2
- OpFloorWithPrecisionFloat64x4
- OpFloorWithPrecisionFloat64x8
- OpFloorWithPrecisionMaskedFloat32x4
- OpFloorWithPrecisionMaskedFloat32x8
- OpFloorWithPrecisionMaskedFloat32x16
- OpFloorWithPrecisionMaskedFloat64x2
- OpFloorWithPrecisionMaskedFloat64x4
- OpFloorWithPrecisionMaskedFloat64x8
+ OpCeilScaledFloat32x4
+ OpCeilScaledFloat32x8
+ OpCeilScaledFloat32x16
+ OpCeilScaledFloat64x2
+ OpCeilScaledFloat64x4
+ OpCeilScaledFloat64x8
+ OpCeilScaledMaskedFloat32x4
+ OpCeilScaledMaskedFloat32x8
+ OpCeilScaledMaskedFloat32x16
+ OpCeilScaledMaskedFloat64x2
+ OpCeilScaledMaskedFloat64x4
+ OpCeilScaledMaskedFloat64x8
+ OpCeilScaledResidueFloat32x4
+ OpCeilScaledResidueFloat32x8
+ OpCeilScaledResidueFloat32x16
+ OpCeilScaledResidueFloat64x2
+ OpCeilScaledResidueFloat64x4
+ OpCeilScaledResidueFloat64x8
+ OpCeilScaledResidueMaskedFloat32x4
+ OpCeilScaledResidueMaskedFloat32x8
+ OpCeilScaledResidueMaskedFloat32x16
+ OpCeilScaledResidueMaskedFloat64x2
+ OpCeilScaledResidueMaskedFloat64x4
+ OpCeilScaledResidueMaskedFloat64x8
+ OpFloorScaledFloat32x4
+ OpFloorScaledFloat32x8
+ OpFloorScaledFloat32x16
+ OpFloorScaledFloat64x2
+ OpFloorScaledFloat64x4
+ OpFloorScaledFloat64x8
+ OpFloorScaledMaskedFloat32x4
+ OpFloorScaledMaskedFloat32x8
+ OpFloorScaledMaskedFloat32x16
+ OpFloorScaledMaskedFloat64x2
+ OpFloorScaledMaskedFloat64x4
+ OpFloorScaledMaskedFloat64x8
+ OpFloorScaledResidueFloat32x4
+ OpFloorScaledResidueFloat32x8
+ OpFloorScaledResidueFloat32x16
+ OpFloorScaledResidueFloat64x2
+ OpFloorScaledResidueFloat64x4
+ OpFloorScaledResidueFloat64x8
+ OpFloorScaledResidueMaskedFloat32x4
+ OpFloorScaledResidueMaskedFloat32x8
+ OpFloorScaledResidueMaskedFloat32x16
+ OpFloorScaledResidueMaskedFloat64x2
+ OpFloorScaledResidueMaskedFloat64x4
+ OpFloorScaledResidueMaskedFloat64x8
OpGaloisFieldAffineTransformInverseMaskedUint8x16
OpGaloisFieldAffineTransformInverseMaskedUint8x32
OpGaloisFieldAffineTransformInverseMaskedUint8x64
OpRotateAllRightUint64x2
OpRotateAllRightUint64x4
OpRotateAllRightUint64x8
- OpRoundWithPrecisionFloat32x4
- OpRoundWithPrecisionFloat32x8
- OpRoundWithPrecisionFloat32x16
- OpRoundWithPrecisionFloat64x2
- OpRoundWithPrecisionFloat64x4
- OpRoundWithPrecisionFloat64x8
- OpRoundWithPrecisionMaskedFloat32x4
- OpRoundWithPrecisionMaskedFloat32x8
- OpRoundWithPrecisionMaskedFloat32x16
- OpRoundWithPrecisionMaskedFloat64x2
- OpRoundWithPrecisionMaskedFloat64x4
- OpRoundWithPrecisionMaskedFloat64x8
+ OpRoundScaledFloat32x4
+ OpRoundScaledFloat32x8
+ OpRoundScaledFloat32x16
+ OpRoundScaledFloat64x2
+ OpRoundScaledFloat64x4
+ OpRoundScaledFloat64x8
+ OpRoundScaledMaskedFloat32x4
+ OpRoundScaledMaskedFloat32x8
+ OpRoundScaledMaskedFloat32x16
+ OpRoundScaledMaskedFloat64x2
+ OpRoundScaledMaskedFloat64x4
+ OpRoundScaledMaskedFloat64x8
+ OpRoundScaledResidueFloat32x4
+ OpRoundScaledResidueFloat32x8
+ OpRoundScaledResidueFloat32x16
+ OpRoundScaledResidueFloat64x2
+ OpRoundScaledResidueFloat64x4
+ OpRoundScaledResidueFloat64x8
+ OpRoundScaledResidueMaskedFloat32x4
+ OpRoundScaledResidueMaskedFloat32x8
+ OpRoundScaledResidueMaskedFloat32x16
+ OpRoundScaledResidueMaskedFloat64x2
+ OpRoundScaledResidueMaskedFloat64x4
+ OpRoundScaledResidueMaskedFloat64x8
OpSet128Float32x8
OpSet128Float64x4
OpSet128Int8x32
OpShiftAllRightConcatUint64x2
OpShiftAllRightConcatUint64x4
OpShiftAllRightConcatUint64x8
- OpTruncWithPrecisionFloat32x4
- OpTruncWithPrecisionFloat32x8
- OpTruncWithPrecisionFloat32x16
- OpTruncWithPrecisionFloat64x2
- OpTruncWithPrecisionFloat64x4
- OpTruncWithPrecisionFloat64x8
- OpTruncWithPrecisionMaskedFloat32x4
- OpTruncWithPrecisionMaskedFloat32x8
- OpTruncWithPrecisionMaskedFloat32x16
- OpTruncWithPrecisionMaskedFloat64x2
- OpTruncWithPrecisionMaskedFloat64x4
- OpTruncWithPrecisionMaskedFloat64x8
+ OpTruncScaledFloat32x4
+ OpTruncScaledFloat32x8
+ OpTruncScaledFloat32x16
+ OpTruncScaledFloat64x2
+ OpTruncScaledFloat64x4
+ OpTruncScaledFloat64x8
+ OpTruncScaledMaskedFloat32x4
+ OpTruncScaledMaskedFloat32x8
+ OpTruncScaledMaskedFloat32x16
+ OpTruncScaledMaskedFloat64x2
+ OpTruncScaledMaskedFloat64x4
+ OpTruncScaledMaskedFloat64x8
+ OpTruncScaledResidueFloat32x4
+ OpTruncScaledResidueFloat32x8
+ OpTruncScaledResidueFloat32x16
+ OpTruncScaledResidueFloat64x2
+ OpTruncScaledResidueFloat64x4
+ OpTruncScaledResidueFloat64x8
+ OpTruncScaledResidueMaskedFloat32x4
+ OpTruncScaledResidueMaskedFloat32x8
+ OpTruncScaledResidueMaskedFloat32x16
+ OpTruncScaledResidueMaskedFloat64x2
+ OpTruncScaledResidueMaskedFloat64x4
+ OpTruncScaledResidueMaskedFloat64x8
)
var opcodeTable = [...]opInfo{
commutative: true,
generic: true,
},
+ {
+ name: "AddPairsFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsFloat32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsFloat64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsInt32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsSaturatedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsSaturatedInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddPairsUint32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt8x16",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt8x32",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt8x64",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt16x8",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt16x16",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedInt16x32",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt8x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt8x32",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt8x64",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt16x8",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt16x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedInt16x32",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint8x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint8x32",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint8x64",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint16x8",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint16x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedMaskedUint16x32",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint8x16",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint8x32",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint8x64",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint16x8",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint16x16",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "AddSaturatedUint16x32",
+ argLen: 2,
+ commutative: true,
+ generic: true,
+ },
{
name: "AddSubFloat32x4",
argLen: 2,
commutative: true,
generic: true,
},
- {
- name: "MulByPowOf2Float32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2Float32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2Float32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2Float64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2Float64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2Float64x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat64x2",
- argLen: 3,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat64x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "MulByPowOf2MaskedFloat64x8",
- argLen: 3,
- generic: true,
- },
{
name: "MulEvenWidenInt32x4",
argLen: 2,
generic: true,
},
{
- name: "MulLowInt16x8",
+ name: "MulInt16x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt16x16",
+ name: "MulInt16x16",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt16x32",
+ name: "MulInt16x32",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt32x4",
+ name: "MulInt32x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt32x8",
+ name: "MulInt32x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt32x16",
+ name: "MulInt32x16",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt64x2",
+ name: "MulInt64x2",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt64x4",
+ name: "MulInt64x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulLowInt64x8",
+ name: "MulInt64x8",
argLen: 2,
commutative: true,
generic: true,
},
- {
- name: "MulLowMaskedInt16x8",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt16x16",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt16x32",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt32x4",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt32x8",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt32x16",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt64x2",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt64x4",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "MulLowMaskedInt64x8",
- argLen: 3,
- commutative: true,
- generic: true,
- },
{
name: "MulMaskedFloat32x4",
argLen: 3,
commutative: true,
generic: true,
},
+ {
+ name: "MulMaskedInt16x8",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt16x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt16x32",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt32x4",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt32x8",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt32x16",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt64x2",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt64x4",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
+ {
+ name: "MulMaskedInt64x8",
+ argLen: 3,
+ commutative: true,
+ generic: true,
+ },
{
name: "NotEqualFloat32x4",
argLen: 2,
argLen: 3,
generic: true,
},
- {
- name: "PairwiseAddFloat32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddFloat32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddFloat64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddFloat64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddInt32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddInt32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddUint16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddUint16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddUint32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseAddUint32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubFloat32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubFloat32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubFloat64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubFloat64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubInt32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubInt32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubUint16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubUint16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubUint32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairwiseSubUint32x8",
- argLen: 2,
- generic: true,
- },
{
name: "Permute2Float32x4",
argLen: 3,
generic: true,
},
{
- name: "SaturatedAddInt8x16",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddInt8x32",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddInt8x64",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddInt16x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddInt16x16",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddInt16x32",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt8x16",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt8x32",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt8x64",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt16x8",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt16x16",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedInt16x32",
- argLen: 3,
- commutative: true,
- generic: true,
- },
- {
- name: "SaturatedAddMaskedUint8x16",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddMaskedUint8x32",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddMaskedUint8x64",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddMaskedUint16x8",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdUint8x16",
+ argLen: 2,
+ generic: true,
},
{
- name: "SaturatedAddMaskedUint16x16",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdUint8x32",
+ argLen: 2,
+ generic: true,
},
{
- name: "SaturatedAddMaskedUint16x32",
- argLen: 3,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedPairDotProdUint8x64",
+ argLen: 2,
+ generic: true,
},
{
- name: "SaturatedAddUint8x16",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddUint8x32",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddUint8x64",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16",
+ argLen: 3,
+ generic: true,
},
{
- name: "SaturatedAddUint16x8",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4",
+ argLen: 4,
+ generic: true,
},
{
- name: "SaturatedAddUint16x16",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8",
+ argLen: 4,
+ generic: true,
},
{
- name: "SaturatedAddUint16x32",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16",
+ argLen: 4,
+ generic: true,
},
{
- name: "SaturatedPairwiseAddInt16x8",
+ name: "ScaleFloat32x4",
argLen: 2,
generic: true,
},
{
- name: "SaturatedPairwiseAddInt16x16",
+ name: "ScaleFloat32x8",
argLen: 2,
generic: true,
},
{
- name: "SaturatedPairwiseSubInt16x8",
+ name: "ScaleFloat32x16",
argLen: 2,
generic: true,
},
{
- name: "SaturatedPairwiseSubInt16x16",
+ name: "ScaleFloat64x2",
argLen: 2,
generic: true,
},
{
- name: "SaturatedSubInt8x16",
+ name: "ScaleFloat64x4",
argLen: 2,
generic: true,
},
{
- name: "SaturatedSubInt8x32",
+ name: "ScaleFloat64x8",
argLen: 2,
generic: true,
},
{
- name: "SaturatedSubInt8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubInt16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedInt8x16",
+ name: "ScaleMaskedFloat32x4",
argLen: 3,
generic: true,
},
{
- name: "SaturatedSubMaskedInt8x32",
+ name: "ScaleMaskedFloat32x8",
argLen: 3,
generic: true,
},
{
- name: "SaturatedSubMaskedInt8x64",
+ name: "ScaleMaskedFloat32x16",
argLen: 3,
generic: true,
},
{
- name: "SaturatedSubMaskedInt16x8",
+ name: "ScaleMaskedFloat64x2",
argLen: 3,
generic: true,
},
{
- name: "SaturatedSubMaskedInt16x16",
+ name: "ScaleMaskedFloat64x4",
argLen: 3,
generic: true,
},
{
- name: "SaturatedSubMaskedInt16x32",
+ name: "ScaleMaskedFloat64x8",
argLen: 3,
generic: true,
},
- {
- name: "SaturatedSubMaskedUint8x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedUint8x32",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedUint8x64",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedUint16x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedUint16x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubMaskedUint16x32",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedSubUint8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubUint8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubUint8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubUint16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubUint16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedSubUint16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16",
- argLen: 4,
- generic: true,
- },
{
name: "ShiftAllLeftInt16x8",
argLen: 2,
argLen: 3,
generic: true,
},
+ {
+ name: "SubPairsFloat32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsFloat32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsFloat64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsFloat64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsInt32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsSaturatedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsSaturatedInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubPairsUint32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedInt16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedInt16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedMaskedUint16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "SubSaturatedUint16x32",
+ argLen: 2,
+ generic: true,
+ },
{
name: "SubUint8x16",
argLen: 2,
generic: true,
},
{
- name: "CeilWithPrecisionFloat32x4",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionFloat32x8",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionFloat32x16",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionFloat64x2",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionFloat64x4",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionFloat64x8",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat32x4",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat32x8",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat32x16",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat64x2",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat64x4",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "CeilWithPrecisionMaskedFloat64x8",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat32x4",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat32x8",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat32x16",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat64x2",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat64x4",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionFloat64x8",
- auxType: auxInt8,
- argLen: 1,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat32x4",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat32x8",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat32x16",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat64x2",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat64x4",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithCeilWithPrecisionMaskedFloat64x8",
- auxType: auxInt8,
- argLen: 2,
- generic: true,
- },
- {
- name: "DiffWithFloorWithPrecisionFloat32x4",
+ name: "CeilScaledFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionFloat32x8",
+ name: "CeilScaledFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionFloat32x16",
+ name: "CeilScaledFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionFloat64x2",
+ name: "CeilScaledFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionFloat64x4",
+ name: "CeilScaledFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionFloat64x8",
+ name: "CeilScaledFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat32x4",
+ name: "CeilScaledMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat32x8",
+ name: "CeilScaledMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat32x16",
+ name: "CeilScaledMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat64x2",
+ name: "CeilScaledMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat64x4",
+ name: "CeilScaledMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithFloorWithPrecisionMaskedFloat64x8",
+ name: "CeilScaledMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat32x4",
+ name: "CeilScaledResidueFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat32x8",
+ name: "CeilScaledResidueFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat32x16",
+ name: "CeilScaledResidueFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat64x2",
+ name: "CeilScaledResidueFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat64x4",
+ name: "CeilScaledResidueFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionFloat64x8",
+ name: "CeilScaledResidueFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat32x4",
+ name: "CeilScaledResidueMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat32x8",
+ name: "CeilScaledResidueMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat32x16",
+ name: "CeilScaledResidueMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat64x2",
+ name: "CeilScaledResidueMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat64x4",
+ name: "CeilScaledResidueMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithRoundWithPrecisionMaskedFloat64x8",
+ name: "CeilScaledResidueMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat32x4",
+ name: "FloorScaledFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat32x8",
+ name: "FloorScaledFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat32x16",
+ name: "FloorScaledFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat64x2",
+ name: "FloorScaledFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat64x4",
+ name: "FloorScaledFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionFloat64x8",
+ name: "FloorScaledFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat32x4",
+ name: "FloorScaledMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat32x8",
+ name: "FloorScaledMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat32x16",
+ name: "FloorScaledMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat64x2",
+ name: "FloorScaledMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat64x4",
+ name: "FloorScaledMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "DiffWithTruncWithPrecisionMaskedFloat64x8",
+ name: "FloorScaledMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionFloat32x4",
+ name: "FloorScaledResidueFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionFloat32x8",
+ name: "FloorScaledResidueFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionFloat32x16",
+ name: "FloorScaledResidueFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionFloat64x2",
+ name: "FloorScaledResidueFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionFloat64x4",
+ name: "FloorScaledResidueFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionFloat64x8",
+ name: "FloorScaledResidueFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat32x4",
+ name: "FloorScaledResidueMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat32x8",
+ name: "FloorScaledResidueMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat32x16",
+ name: "FloorScaledResidueMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat64x2",
+ name: "FloorScaledResidueMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat64x4",
+ name: "FloorScaledResidueMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "FloorWithPrecisionMaskedFloat64x8",
+ name: "FloorScaledResidueMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
generic: true,
},
{
- name: "RoundWithPrecisionFloat32x4",
+ name: "RoundScaledFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionFloat32x8",
+ name: "RoundScaledFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionFloat32x16",
+ name: "RoundScaledFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionFloat64x2",
+ name: "RoundScaledFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionFloat64x4",
+ name: "RoundScaledFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionFloat64x8",
+ name: "RoundScaledFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat32x4",
+ name: "RoundScaledMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat32x8",
+ name: "RoundScaledMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat32x16",
+ name: "RoundScaledMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat64x2",
+ name: "RoundScaledMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat64x4",
+ name: "RoundScaledMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundWithPrecisionMaskedFloat64x8",
+ name: "RoundScaledMaskedFloat64x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat32x4",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat32x8",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat32x16",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat64x2",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat64x4",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueFloat64x8",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat32x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat32x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat32x16",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat64x2",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat64x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "RoundScaledResidueMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
generic: true,
},
{
- name: "TruncWithPrecisionFloat32x4",
+ name: "TruncScaledFloat32x4",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledFloat32x8",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledFloat32x16",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledFloat64x2",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledFloat64x4",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledFloat64x8",
+ auxType: auxInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat32x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat32x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat32x16",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat64x2",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat64x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledMaskedFloat64x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "TruncScaledResidueFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionFloat32x8",
+ name: "TruncScaledResidueFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionFloat32x16",
+ name: "TruncScaledResidueFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionFloat64x2",
+ name: "TruncScaledResidueFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionFloat64x4",
+ name: "TruncScaledResidueFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionFloat64x8",
+ name: "TruncScaledResidueFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat32x4",
+ name: "TruncScaledResidueMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat32x8",
+ name: "TruncScaledResidueMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat32x16",
+ name: "TruncScaledResidueMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat64x2",
+ name: "TruncScaledResidueMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat64x4",
+ name: "TruncScaledResidueMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "TruncWithPrecisionMaskedFloat64x8",
+ name: "TruncScaledResidueMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
return rewriteValueAMD64_OpAddMaskedUint8x32(v)
case OpAddMaskedUint8x64:
return rewriteValueAMD64_OpAddMaskedUint8x64(v)
+ case OpAddPairsFloat32x4:
+ v.Op = OpAMD64VHADDPS128
+ return true
+ case OpAddPairsFloat32x8:
+ v.Op = OpAMD64VHADDPS256
+ return true
+ case OpAddPairsFloat64x2:
+ v.Op = OpAMD64VHADDPD128
+ return true
+ case OpAddPairsFloat64x4:
+ v.Op = OpAMD64VHADDPD256
+ return true
+ case OpAddPairsInt16x16:
+ v.Op = OpAMD64VPHADDW256
+ return true
+ case OpAddPairsInt16x8:
+ v.Op = OpAMD64VPHADDW128
+ return true
+ case OpAddPairsInt32x4:
+ v.Op = OpAMD64VPHADDD128
+ return true
+ case OpAddPairsInt32x8:
+ v.Op = OpAMD64VPHADDD256
+ return true
+ case OpAddPairsSaturatedInt16x16:
+ v.Op = OpAMD64VPHADDSW256
+ return true
+ case OpAddPairsSaturatedInt16x8:
+ v.Op = OpAMD64VPHADDSW128
+ return true
+ case OpAddPairsUint16x16:
+ v.Op = OpAMD64VPHADDW256
+ return true
+ case OpAddPairsUint16x8:
+ v.Op = OpAMD64VPHADDW128
+ return true
+ case OpAddPairsUint32x4:
+ v.Op = OpAMD64VPHADDD128
+ return true
+ case OpAddPairsUint32x8:
+ v.Op = OpAMD64VPHADDD256
+ return true
case OpAddPtr:
v.Op = OpAMD64ADDQ
return true
+ case OpAddSaturatedInt16x16:
+ v.Op = OpAMD64VPADDSW256
+ return true
+ case OpAddSaturatedInt16x32:
+ v.Op = OpAMD64VPADDSW512
+ return true
+ case OpAddSaturatedInt16x8:
+ v.Op = OpAMD64VPADDSW128
+ return true
+ case OpAddSaturatedInt8x16:
+ v.Op = OpAMD64VPADDSB128
+ return true
+ case OpAddSaturatedInt8x32:
+ v.Op = OpAMD64VPADDSB256
+ return true
+ case OpAddSaturatedInt8x64:
+ v.Op = OpAMD64VPADDSB512
+ return true
+ case OpAddSaturatedMaskedInt16x16:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v)
+ case OpAddSaturatedMaskedInt16x32:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v)
+ case OpAddSaturatedMaskedInt16x8:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v)
+ case OpAddSaturatedMaskedInt8x16:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v)
+ case OpAddSaturatedMaskedInt8x32:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v)
+ case OpAddSaturatedMaskedInt8x64:
+ return rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v)
+ case OpAddSaturatedMaskedUint16x16:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v)
+ case OpAddSaturatedMaskedUint16x32:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v)
+ case OpAddSaturatedMaskedUint16x8:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v)
+ case OpAddSaturatedMaskedUint8x16:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v)
+ case OpAddSaturatedMaskedUint8x32:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v)
+ case OpAddSaturatedMaskedUint8x64:
+ return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
+ case OpAddSaturatedUint16x16:
+ v.Op = OpAMD64VPADDSW256
+ return true
+ case OpAddSaturatedUint16x32:
+ v.Op = OpAMD64VPADDSW512
+ return true
+ case OpAddSaturatedUint16x8:
+ v.Op = OpAMD64VPADDSW128
+ return true
+ case OpAddSaturatedUint8x16:
+ v.Op = OpAMD64VPADDSB128
+ return true
+ case OpAddSaturatedUint8x32:
+ v.Op = OpAMD64VPADDSB256
+ return true
+ case OpAddSaturatedUint8x64:
+ v.Op = OpAMD64VPADDSB512
+ return true
case OpAddSubFloat32x4:
v.Op = OpAMD64VADDSUBPS128
return true
return rewriteValueAMD64_OpCeilFloat64x2(v)
case OpCeilFloat64x4:
return rewriteValueAMD64_OpCeilFloat64x4(v)
- case OpCeilWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v)
- case OpCeilWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v)
- case OpCeilWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v)
- case OpCeilWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v)
- case OpCeilWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v)
- case OpCeilWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v)
- case OpCeilWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x16(v)
- case OpCeilWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x4(v)
- case OpCeilWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x8(v)
- case OpCeilWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x2(v)
- case OpCeilWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x4(v)
- case OpCeilWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v)
+ case OpCeilScaledFloat32x16:
+ return rewriteValueAMD64_OpCeilScaledFloat32x16(v)
+ case OpCeilScaledFloat32x4:
+ return rewriteValueAMD64_OpCeilScaledFloat32x4(v)
+ case OpCeilScaledFloat32x8:
+ return rewriteValueAMD64_OpCeilScaledFloat32x8(v)
+ case OpCeilScaledFloat64x2:
+ return rewriteValueAMD64_OpCeilScaledFloat64x2(v)
+ case OpCeilScaledFloat64x4:
+ return rewriteValueAMD64_OpCeilScaledFloat64x4(v)
+ case OpCeilScaledFloat64x8:
+ return rewriteValueAMD64_OpCeilScaledFloat64x8(v)
+ case OpCeilScaledMaskedFloat32x16:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v)
+ case OpCeilScaledMaskedFloat32x4:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v)
+ case OpCeilScaledMaskedFloat32x8:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v)
+ case OpCeilScaledMaskedFloat64x2:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v)
+ case OpCeilScaledMaskedFloat64x4:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v)
+ case OpCeilScaledMaskedFloat64x8:
+ return rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v)
+ case OpCeilScaledResidueFloat32x16:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v)
+ case OpCeilScaledResidueFloat32x4:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v)
+ case OpCeilScaledResidueFloat32x8:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v)
+ case OpCeilScaledResidueFloat64x2:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v)
+ case OpCeilScaledResidueFloat64x4:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v)
+ case OpCeilScaledResidueFloat64x8:
+ return rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v)
+ case OpCeilScaledResidueMaskedFloat32x16:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v)
+ case OpCeilScaledResidueMaskedFloat32x4:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v)
+ case OpCeilScaledResidueMaskedFloat32x8:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v)
+ case OpCeilScaledResidueMaskedFloat64x2:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v)
+ case OpCeilScaledResidueMaskedFloat64x4:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v)
+ case OpCeilScaledResidueMaskedFloat64x8:
+ return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v)
case OpClosureCall:
v.Op = OpAMD64CALLclosure
return true
case OpCvtBoolToUint8:
v.Op = OpCopy
return true
- case OpDiffWithCeilWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v)
- case OpDiffWithCeilWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v)
- case OpDiffWithCeilWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v)
- case OpDiffWithCeilWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v)
- case OpDiffWithCeilWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v)
- case OpDiffWithCeilWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x16(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x4(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x8(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x2(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x4(v)
- case OpDiffWithCeilWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x8(v)
- case OpDiffWithFloorWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v)
- case OpDiffWithFloorWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v)
- case OpDiffWithFloorWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v)
- case OpDiffWithFloorWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v)
- case OpDiffWithFloorWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v)
- case OpDiffWithFloorWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x16(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x4(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x8(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x2(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x4(v)
- case OpDiffWithFloorWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x8(v)
- case OpDiffWithRoundWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v)
- case OpDiffWithRoundWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v)
- case OpDiffWithRoundWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v)
- case OpDiffWithRoundWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v)
- case OpDiffWithRoundWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v)
- case OpDiffWithRoundWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x16(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x4(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x8(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x2(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x4(v)
- case OpDiffWithRoundWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x8(v)
- case OpDiffWithTruncWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v)
- case OpDiffWithTruncWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v)
- case OpDiffWithTruncWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v)
- case OpDiffWithTruncWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v)
- case OpDiffWithTruncWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v)
- case OpDiffWithTruncWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x16(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x4(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x8(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x2(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x4(v)
- case OpDiffWithTruncWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x8(v)
case OpDiv128u:
v.Op = OpAMD64DIVQU2
return true
return rewriteValueAMD64_OpFloorFloat64x2(v)
case OpFloorFloat64x4:
return rewriteValueAMD64_OpFloorFloat64x4(v)
- case OpFloorWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v)
- case OpFloorWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v)
- case OpFloorWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v)
- case OpFloorWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v)
- case OpFloorWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v)
- case OpFloorWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v)
- case OpFloorWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x16(v)
- case OpFloorWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x4(v)
- case OpFloorWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x8(v)
- case OpFloorWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x2(v)
- case OpFloorWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x4(v)
- case OpFloorWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x8(v)
+ case OpFloorScaledFloat32x16:
+ return rewriteValueAMD64_OpFloorScaledFloat32x16(v)
+ case OpFloorScaledFloat32x4:
+ return rewriteValueAMD64_OpFloorScaledFloat32x4(v)
+ case OpFloorScaledFloat32x8:
+ return rewriteValueAMD64_OpFloorScaledFloat32x8(v)
+ case OpFloorScaledFloat64x2:
+ return rewriteValueAMD64_OpFloorScaledFloat64x2(v)
+ case OpFloorScaledFloat64x4:
+ return rewriteValueAMD64_OpFloorScaledFloat64x4(v)
+ case OpFloorScaledFloat64x8:
+ return rewriteValueAMD64_OpFloorScaledFloat64x8(v)
+ case OpFloorScaledMaskedFloat32x16:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v)
+ case OpFloorScaledMaskedFloat32x4:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v)
+ case OpFloorScaledMaskedFloat32x8:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v)
+ case OpFloorScaledMaskedFloat64x2:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v)
+ case OpFloorScaledMaskedFloat64x4:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v)
+ case OpFloorScaledMaskedFloat64x8:
+ return rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v)
+ case OpFloorScaledResidueFloat32x16:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v)
+ case OpFloorScaledResidueFloat32x4:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v)
+ case OpFloorScaledResidueFloat32x8:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v)
+ case OpFloorScaledResidueFloat64x2:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v)
+ case OpFloorScaledResidueFloat64x4:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v)
+ case OpFloorScaledResidueFloat64x8:
+ return rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v)
+ case OpFloorScaledResidueMaskedFloat32x16:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v)
+ case OpFloorScaledResidueMaskedFloat32x4:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v)
+ case OpFloorScaledResidueMaskedFloat32x8:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v)
+ case OpFloorScaledResidueMaskedFloat64x2:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v)
+ case OpFloorScaledResidueMaskedFloat64x4:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
+ case OpFloorScaledResidueMaskedFloat64x8:
+ return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
case OpFusedMultiplyAddFloat32x16:
v.Op = OpAMD64VFMADD213PS512
return true
case OpMul8:
v.Op = OpAMD64MULL
return true
- case OpMulByPowOf2Float32x16:
- v.Op = OpAMD64VSCALEFPS512
- return true
- case OpMulByPowOf2Float32x4:
- v.Op = OpAMD64VSCALEFPS128
- return true
- case OpMulByPowOf2Float32x8:
- v.Op = OpAMD64VSCALEFPS256
- return true
- case OpMulByPowOf2Float64x2:
- v.Op = OpAMD64VSCALEFPD128
- return true
- case OpMulByPowOf2Float64x4:
- v.Op = OpAMD64VSCALEFPD256
- return true
- case OpMulByPowOf2Float64x8:
- v.Op = OpAMD64VSCALEFPD512
- return true
- case OpMulByPowOf2MaskedFloat32x16:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x16(v)
- case OpMulByPowOf2MaskedFloat32x4:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x4(v)
- case OpMulByPowOf2MaskedFloat32x8:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x8(v)
- case OpMulByPowOf2MaskedFloat64x2:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x2(v)
- case OpMulByPowOf2MaskedFloat64x4:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x4(v)
- case OpMulByPowOf2MaskedFloat64x8:
- return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x8(v)
case OpMulEvenWidenInt32x4:
v.Op = OpAMD64VPMULDQ128
return true
case OpMulHighUint16x8:
v.Op = OpAMD64VPMULHUW128
return true
- case OpMulLowInt16x16:
+ case OpMulInt16x16:
v.Op = OpAMD64VPMULLW256
return true
- case OpMulLowInt16x32:
+ case OpMulInt16x32:
v.Op = OpAMD64VPMULLW512
return true
- case OpMulLowInt16x8:
+ case OpMulInt16x8:
v.Op = OpAMD64VPMULLW128
return true
- case OpMulLowInt32x16:
+ case OpMulInt32x16:
v.Op = OpAMD64VPMULLD512
return true
- case OpMulLowInt32x4:
+ case OpMulInt32x4:
v.Op = OpAMD64VPMULLD128
return true
- case OpMulLowInt32x8:
+ case OpMulInt32x8:
v.Op = OpAMD64VPMULLD256
return true
- case OpMulLowInt64x2:
+ case OpMulInt64x2:
v.Op = OpAMD64VPMULLQ128
return true
- case OpMulLowInt64x4:
+ case OpMulInt64x4:
v.Op = OpAMD64VPMULLQ256
return true
- case OpMulLowInt64x8:
+ case OpMulInt64x8:
v.Op = OpAMD64VPMULLQ512
return true
- case OpMulLowMaskedInt16x16:
- return rewriteValueAMD64_OpMulLowMaskedInt16x16(v)
- case OpMulLowMaskedInt16x32:
- return rewriteValueAMD64_OpMulLowMaskedInt16x32(v)
- case OpMulLowMaskedInt16x8:
- return rewriteValueAMD64_OpMulLowMaskedInt16x8(v)
- case OpMulLowMaskedInt32x16:
- return rewriteValueAMD64_OpMulLowMaskedInt32x16(v)
- case OpMulLowMaskedInt32x4:
- return rewriteValueAMD64_OpMulLowMaskedInt32x4(v)
- case OpMulLowMaskedInt32x8:
- return rewriteValueAMD64_OpMulLowMaskedInt32x8(v)
- case OpMulLowMaskedInt64x2:
- return rewriteValueAMD64_OpMulLowMaskedInt64x2(v)
- case OpMulLowMaskedInt64x4:
- return rewriteValueAMD64_OpMulLowMaskedInt64x4(v)
- case OpMulLowMaskedInt64x8:
- return rewriteValueAMD64_OpMulLowMaskedInt64x8(v)
case OpMulMaskedFloat32x16:
return rewriteValueAMD64_OpMulMaskedFloat32x16(v)
case OpMulMaskedFloat32x4:
return rewriteValueAMD64_OpMulMaskedFloat64x4(v)
case OpMulMaskedFloat64x8:
return rewriteValueAMD64_OpMulMaskedFloat64x8(v)
+ case OpMulMaskedInt16x16:
+ return rewriteValueAMD64_OpMulMaskedInt16x16(v)
+ case OpMulMaskedInt16x32:
+ return rewriteValueAMD64_OpMulMaskedInt16x32(v)
+ case OpMulMaskedInt16x8:
+ return rewriteValueAMD64_OpMulMaskedInt16x8(v)
+ case OpMulMaskedInt32x16:
+ return rewriteValueAMD64_OpMulMaskedInt32x16(v)
+ case OpMulMaskedInt32x4:
+ return rewriteValueAMD64_OpMulMaskedInt32x4(v)
+ case OpMulMaskedInt32x8:
+ return rewriteValueAMD64_OpMulMaskedInt32x8(v)
+ case OpMulMaskedInt64x2:
+ return rewriteValueAMD64_OpMulMaskedInt64x2(v)
+ case OpMulMaskedInt64x4:
+ return rewriteValueAMD64_OpMulMaskedInt64x4(v)
+ case OpMulMaskedInt64x8:
+ return rewriteValueAMD64_OpMulMaskedInt64x8(v)
case OpNeg16:
v.Op = OpAMD64NEGL
return true
return rewriteValueAMD64_OpPairDotProdMaskedInt16x32(v)
case OpPairDotProdMaskedInt16x8:
return rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v)
- case OpPairwiseAddFloat32x4:
- v.Op = OpAMD64VHADDPS128
- return true
- case OpPairwiseAddFloat32x8:
- v.Op = OpAMD64VHADDPS256
- return true
- case OpPairwiseAddFloat64x2:
- v.Op = OpAMD64VHADDPD128
- return true
- case OpPairwiseAddFloat64x4:
- v.Op = OpAMD64VHADDPD256
- return true
- case OpPairwiseAddInt16x16:
- v.Op = OpAMD64VPHADDW256
- return true
- case OpPairwiseAddInt16x8:
- v.Op = OpAMD64VPHADDW128
- return true
- case OpPairwiseAddInt32x4:
- v.Op = OpAMD64VPHADDD128
- return true
- case OpPairwiseAddInt32x8:
- v.Op = OpAMD64VPHADDD256
- return true
- case OpPairwiseAddUint16x16:
- v.Op = OpAMD64VPHADDW256
- return true
- case OpPairwiseAddUint16x8:
- v.Op = OpAMD64VPHADDW128
- return true
- case OpPairwiseAddUint32x4:
- v.Op = OpAMD64VPHADDD128
- return true
- case OpPairwiseAddUint32x8:
- v.Op = OpAMD64VPHADDD256
- return true
- case OpPairwiseSubFloat32x4:
- v.Op = OpAMD64VHSUBPS128
- return true
- case OpPairwiseSubFloat32x8:
- v.Op = OpAMD64VHSUBPS256
- return true
- case OpPairwiseSubFloat64x2:
- v.Op = OpAMD64VHSUBPD128
- return true
- case OpPairwiseSubFloat64x4:
- v.Op = OpAMD64VHSUBPD256
- return true
- case OpPairwiseSubInt16x16:
- v.Op = OpAMD64VPHSUBW256
- return true
- case OpPairwiseSubInt16x8:
- v.Op = OpAMD64VPHSUBW128
- return true
- case OpPairwiseSubInt32x4:
- v.Op = OpAMD64VPHSUBD128
- return true
- case OpPairwiseSubInt32x8:
- v.Op = OpAMD64VPHSUBD256
- return true
- case OpPairwiseSubUint16x16:
- v.Op = OpAMD64VPHSUBW256
- return true
- case OpPairwiseSubUint16x8:
- v.Op = OpAMD64VPHSUBW128
- return true
- case OpPairwiseSubUint32x4:
- v.Op = OpAMD64VPHSUBD128
- return true
- case OpPairwiseSubUint32x8:
- v.Op = OpAMD64VPHSUBD256
- return true
case OpPanicBounds:
return rewriteValueAMD64_OpPanicBounds(v)
case OpPermute2Float32x16:
return rewriteValueAMD64_OpRoundFloat64x2(v)
case OpRoundFloat64x4:
return rewriteValueAMD64_OpRoundFloat64x4(v)
+ case OpRoundScaledFloat32x16:
+ return rewriteValueAMD64_OpRoundScaledFloat32x16(v)
+ case OpRoundScaledFloat32x4:
+ return rewriteValueAMD64_OpRoundScaledFloat32x4(v)
+ case OpRoundScaledFloat32x8:
+ return rewriteValueAMD64_OpRoundScaledFloat32x8(v)
+ case OpRoundScaledFloat64x2:
+ return rewriteValueAMD64_OpRoundScaledFloat64x2(v)
+ case OpRoundScaledFloat64x4:
+ return rewriteValueAMD64_OpRoundScaledFloat64x4(v)
+ case OpRoundScaledFloat64x8:
+ return rewriteValueAMD64_OpRoundScaledFloat64x8(v)
+ case OpRoundScaledMaskedFloat32x16:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat32x16(v)
+ case OpRoundScaledMaskedFloat32x4:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat32x4(v)
+ case OpRoundScaledMaskedFloat32x8:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat32x8(v)
+ case OpRoundScaledMaskedFloat64x2:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat64x2(v)
+ case OpRoundScaledMaskedFloat64x4:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat64x4(v)
+ case OpRoundScaledMaskedFloat64x8:
+ return rewriteValueAMD64_OpRoundScaledMaskedFloat64x8(v)
+ case OpRoundScaledResidueFloat32x16:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat32x16(v)
+ case OpRoundScaledResidueFloat32x4:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat32x4(v)
+ case OpRoundScaledResidueFloat32x8:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat32x8(v)
+ case OpRoundScaledResidueFloat64x2:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat64x2(v)
+ case OpRoundScaledResidueFloat64x4:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat64x4(v)
+ case OpRoundScaledResidueFloat64x8:
+ return rewriteValueAMD64_OpRoundScaledResidueFloat64x8(v)
+ case OpRoundScaledResidueMaskedFloat32x16:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x16(v)
+ case OpRoundScaledResidueMaskedFloat32x4:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x4(v)
+ case OpRoundScaledResidueMaskedFloat32x8:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x8(v)
+ case OpRoundScaledResidueMaskedFloat64x2:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x2(v)
+ case OpRoundScaledResidueMaskedFloat64x4:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x4(v)
+ case OpRoundScaledResidueMaskedFloat64x8:
+ return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x8(v)
case OpRoundToEven:
return rewriteValueAMD64_OpRoundToEven(v)
- case OpRoundWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v)
- case OpRoundWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v)
- case OpRoundWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v)
- case OpRoundWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v)
- case OpRoundWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v)
- case OpRoundWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v)
- case OpRoundWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x16(v)
- case OpRoundWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x4(v)
- case OpRoundWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x8(v)
- case OpRoundWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x2(v)
- case OpRoundWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x4(v)
- case OpRoundWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x8(v)
case OpRsh16Ux16:
return rewriteValueAMD64_OpRsh16Ux16(v)
case OpRsh16Ux32:
return rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x4(v)
case OpSaturatedAddDotProdMaskedInt32x8:
return rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x8(v)
- case OpSaturatedAddInt16x16:
- v.Op = OpAMD64VPADDSW256
- return true
- case OpSaturatedAddInt16x32:
- v.Op = OpAMD64VPADDSW512
- return true
- case OpSaturatedAddInt16x8:
- v.Op = OpAMD64VPADDSW128
- return true
- case OpSaturatedAddInt8x16:
- v.Op = OpAMD64VPADDSB128
- return true
- case OpSaturatedAddInt8x32:
- v.Op = OpAMD64VPADDSB256
- return true
- case OpSaturatedAddInt8x64:
- v.Op = OpAMD64VPADDSB512
- return true
- case OpSaturatedAddMaskedInt16x16:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt16x16(v)
- case OpSaturatedAddMaskedInt16x32:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt16x32(v)
- case OpSaturatedAddMaskedInt16x8:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt16x8(v)
- case OpSaturatedAddMaskedInt8x16:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt8x16(v)
- case OpSaturatedAddMaskedInt8x32:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt8x32(v)
- case OpSaturatedAddMaskedInt8x64:
- return rewriteValueAMD64_OpSaturatedAddMaskedInt8x64(v)
- case OpSaturatedAddMaskedUint16x16:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint16x16(v)
- case OpSaturatedAddMaskedUint16x32:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint16x32(v)
- case OpSaturatedAddMaskedUint16x8:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint16x8(v)
- case OpSaturatedAddMaskedUint8x16:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint8x16(v)
- case OpSaturatedAddMaskedUint8x32:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint8x32(v)
- case OpSaturatedAddMaskedUint8x64:
- return rewriteValueAMD64_OpSaturatedAddMaskedUint8x64(v)
- case OpSaturatedAddUint16x16:
- v.Op = OpAMD64VPADDSW256
- return true
- case OpSaturatedAddUint16x32:
- v.Op = OpAMD64VPADDSW512
- return true
- case OpSaturatedAddUint16x8:
- v.Op = OpAMD64VPADDSW128
- return true
- case OpSaturatedAddUint8x16:
- v.Op = OpAMD64VPADDSB128
- return true
- case OpSaturatedAddUint8x32:
- v.Op = OpAMD64VPADDSB256
- return true
- case OpSaturatedAddUint8x64:
- v.Op = OpAMD64VPADDSB512
- return true
- case OpSaturatedPairwiseAddInt16x16:
- v.Op = OpAMD64VPHADDSW256
- return true
- case OpSaturatedPairwiseAddInt16x8:
- v.Op = OpAMD64VPHADDSW128
- return true
- case OpSaturatedPairwiseSubInt16x16:
- v.Op = OpAMD64VPHSUBSW256
- return true
- case OpSaturatedPairwiseSubInt16x8:
- v.Op = OpAMD64VPHSUBSW128
- return true
- case OpSaturatedSubInt16x16:
- v.Op = OpAMD64VPSUBSW256
- return true
- case OpSaturatedSubInt16x32:
- v.Op = OpAMD64VPSUBSW512
- return true
- case OpSaturatedSubInt16x8:
- v.Op = OpAMD64VPSUBSW128
- return true
- case OpSaturatedSubInt8x16:
- v.Op = OpAMD64VPSUBSB128
- return true
- case OpSaturatedSubInt8x32:
- v.Op = OpAMD64VPSUBSB256
- return true
- case OpSaturatedSubInt8x64:
- v.Op = OpAMD64VPSUBSB512
- return true
- case OpSaturatedSubMaskedInt16x16:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt16x16(v)
- case OpSaturatedSubMaskedInt16x32:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt16x32(v)
- case OpSaturatedSubMaskedInt16x8:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt16x8(v)
- case OpSaturatedSubMaskedInt8x16:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt8x16(v)
- case OpSaturatedSubMaskedInt8x32:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt8x32(v)
- case OpSaturatedSubMaskedInt8x64:
- return rewriteValueAMD64_OpSaturatedSubMaskedInt8x64(v)
- case OpSaturatedSubMaskedUint16x16:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint16x16(v)
- case OpSaturatedSubMaskedUint16x32:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint16x32(v)
- case OpSaturatedSubMaskedUint16x8:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint16x8(v)
- case OpSaturatedSubMaskedUint8x16:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint8x16(v)
- case OpSaturatedSubMaskedUint8x32:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint8x32(v)
- case OpSaturatedSubMaskedUint8x64:
- return rewriteValueAMD64_OpSaturatedSubMaskedUint8x64(v)
- case OpSaturatedSubUint16x16:
- v.Op = OpAMD64VPSUBSW256
- return true
- case OpSaturatedSubUint16x32:
- v.Op = OpAMD64VPSUBSW512
- return true
- case OpSaturatedSubUint16x8:
- v.Op = OpAMD64VPSUBSW128
- return true
- case OpSaturatedSubUint8x16:
- v.Op = OpAMD64VPSUBSB128
- return true
- case OpSaturatedSubUint8x32:
- v.Op = OpAMD64VPSUBSB256
- return true
- case OpSaturatedSubUint8x64:
- v.Op = OpAMD64VPSUBSB512
- return true
case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16:
return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v)
case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
+ case OpScaleFloat32x16:
+ v.Op = OpAMD64VSCALEFPS512
+ return true
+ case OpScaleFloat32x4:
+ v.Op = OpAMD64VSCALEFPS128
+ return true
+ case OpScaleFloat32x8:
+ v.Op = OpAMD64VSCALEFPS256
+ return true
+ case OpScaleFloat64x2:
+ v.Op = OpAMD64VSCALEFPD128
+ return true
+ case OpScaleFloat64x4:
+ v.Op = OpAMD64VSCALEFPD256
+ return true
+ case OpScaleFloat64x8:
+ v.Op = OpAMD64VSCALEFPD512
+ return true
+ case OpScaleMaskedFloat32x16:
+ return rewriteValueAMD64_OpScaleMaskedFloat32x16(v)
+ case OpScaleMaskedFloat32x4:
+ return rewriteValueAMD64_OpScaleMaskedFloat32x4(v)
+ case OpScaleMaskedFloat32x8:
+ return rewriteValueAMD64_OpScaleMaskedFloat32x8(v)
+ case OpScaleMaskedFloat64x2:
+ return rewriteValueAMD64_OpScaleMaskedFloat64x2(v)
+ case OpScaleMaskedFloat64x4:
+ return rewriteValueAMD64_OpScaleMaskedFloat64x4(v)
+ case OpScaleMaskedFloat64x8:
+ return rewriteValueAMD64_OpScaleMaskedFloat64x8(v)
case OpSelect0:
return rewriteValueAMD64_OpSelect0(v)
case OpSelect1:
return rewriteValueAMD64_OpSubMaskedUint8x32(v)
case OpSubMaskedUint8x64:
return rewriteValueAMD64_OpSubMaskedUint8x64(v)
+ case OpSubPairsFloat32x4:
+ v.Op = OpAMD64VHSUBPS128
+ return true
+ case OpSubPairsFloat32x8:
+ v.Op = OpAMD64VHSUBPS256
+ return true
+ case OpSubPairsFloat64x2:
+ v.Op = OpAMD64VHSUBPD128
+ return true
+ case OpSubPairsFloat64x4:
+ v.Op = OpAMD64VHSUBPD256
+ return true
+ case OpSubPairsInt16x16:
+ v.Op = OpAMD64VPHSUBW256
+ return true
+ case OpSubPairsInt16x8:
+ v.Op = OpAMD64VPHSUBW128
+ return true
+ case OpSubPairsInt32x4:
+ v.Op = OpAMD64VPHSUBD128
+ return true
+ case OpSubPairsInt32x8:
+ v.Op = OpAMD64VPHSUBD256
+ return true
+ case OpSubPairsSaturatedInt16x16:
+ v.Op = OpAMD64VPHSUBSW256
+ return true
+ case OpSubPairsSaturatedInt16x8:
+ v.Op = OpAMD64VPHSUBSW128
+ return true
+ case OpSubPairsUint16x16:
+ v.Op = OpAMD64VPHSUBW256
+ return true
+ case OpSubPairsUint16x8:
+ v.Op = OpAMD64VPHSUBW128
+ return true
+ case OpSubPairsUint32x4:
+ v.Op = OpAMD64VPHSUBD128
+ return true
+ case OpSubPairsUint32x8:
+ v.Op = OpAMD64VPHSUBD256
+ return true
case OpSubPtr:
v.Op = OpAMD64SUBQ
return true
+ case OpSubSaturatedInt16x16:
+ v.Op = OpAMD64VPSUBSW256
+ return true
+ case OpSubSaturatedInt16x32:
+ v.Op = OpAMD64VPSUBSW512
+ return true
+ case OpSubSaturatedInt16x8:
+ v.Op = OpAMD64VPSUBSW128
+ return true
+ case OpSubSaturatedInt8x16:
+ v.Op = OpAMD64VPSUBSB128
+ return true
+ case OpSubSaturatedInt8x32:
+ v.Op = OpAMD64VPSUBSB256
+ return true
+ case OpSubSaturatedInt8x64:
+ v.Op = OpAMD64VPSUBSB512
+ return true
+ case OpSubSaturatedMaskedInt16x16:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v)
+ case OpSubSaturatedMaskedInt16x32:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v)
+ case OpSubSaturatedMaskedInt16x8:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v)
+ case OpSubSaturatedMaskedInt8x16:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v)
+ case OpSubSaturatedMaskedInt8x32:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v)
+ case OpSubSaturatedMaskedInt8x64:
+ return rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v)
+ case OpSubSaturatedMaskedUint16x16:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v)
+ case OpSubSaturatedMaskedUint16x32:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v)
+ case OpSubSaturatedMaskedUint16x8:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v)
+ case OpSubSaturatedMaskedUint8x16:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v)
+ case OpSubSaturatedMaskedUint8x32:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v)
+ case OpSubSaturatedMaskedUint8x64:
+ return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
+ case OpSubSaturatedUint16x16:
+ v.Op = OpAMD64VPSUBSW256
+ return true
+ case OpSubSaturatedUint16x32:
+ v.Op = OpAMD64VPSUBSW512
+ return true
+ case OpSubSaturatedUint16x8:
+ v.Op = OpAMD64VPSUBSW128
+ return true
+ case OpSubSaturatedUint8x16:
+ v.Op = OpAMD64VPSUBSB128
+ return true
+ case OpSubSaturatedUint8x32:
+ v.Op = OpAMD64VPSUBSB256
+ return true
+ case OpSubSaturatedUint8x64:
+ v.Op = OpAMD64VPSUBSB512
+ return true
case OpSubUint16x16:
v.Op = OpAMD64VPSUBW256
return true
return rewriteValueAMD64_OpTruncFloat64x2(v)
case OpTruncFloat64x4:
return rewriteValueAMD64_OpTruncFloat64x4(v)
- case OpTruncWithPrecisionFloat32x16:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v)
- case OpTruncWithPrecisionFloat32x4:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v)
- case OpTruncWithPrecisionFloat32x8:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v)
- case OpTruncWithPrecisionFloat64x2:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v)
- case OpTruncWithPrecisionFloat64x4:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v)
- case OpTruncWithPrecisionFloat64x8:
- return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v)
- case OpTruncWithPrecisionMaskedFloat32x16:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x16(v)
- case OpTruncWithPrecisionMaskedFloat32x4:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x4(v)
- case OpTruncWithPrecisionMaskedFloat32x8:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x8(v)
- case OpTruncWithPrecisionMaskedFloat64x2:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x2(v)
- case OpTruncWithPrecisionMaskedFloat64x4:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x4(v)
- case OpTruncWithPrecisionMaskedFloat64x8:
- return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x8(v)
+ case OpTruncScaledFloat32x16:
+ return rewriteValueAMD64_OpTruncScaledFloat32x16(v)
+ case OpTruncScaledFloat32x4:
+ return rewriteValueAMD64_OpTruncScaledFloat32x4(v)
+ case OpTruncScaledFloat32x8:
+ return rewriteValueAMD64_OpTruncScaledFloat32x8(v)
+ case OpTruncScaledFloat64x2:
+ return rewriteValueAMD64_OpTruncScaledFloat64x2(v)
+ case OpTruncScaledFloat64x4:
+ return rewriteValueAMD64_OpTruncScaledFloat64x4(v)
+ case OpTruncScaledFloat64x8:
+ return rewriteValueAMD64_OpTruncScaledFloat64x8(v)
+ case OpTruncScaledMaskedFloat32x16:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v)
+ case OpTruncScaledMaskedFloat32x4:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v)
+ case OpTruncScaledMaskedFloat32x8:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v)
+ case OpTruncScaledMaskedFloat64x2:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v)
+ case OpTruncScaledMaskedFloat64x4:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v)
+ case OpTruncScaledMaskedFloat64x8:
+ return rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v)
+ case OpTruncScaledResidueFloat32x16:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v)
+ case OpTruncScaledResidueFloat32x4:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat32x4(v)
+ case OpTruncScaledResidueFloat32x8:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat32x8(v)
+ case OpTruncScaledResidueFloat64x2:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat64x2(v)
+ case OpTruncScaledResidueFloat64x4:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v)
+ case OpTruncScaledResidueFloat64x8:
+ return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v)
+ case OpTruncScaledResidueMaskedFloat32x16:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v)
+ case OpTruncScaledResidueMaskedFloat32x4:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v)
+ case OpTruncScaledResidueMaskedFloat32x8:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v)
+ case OpTruncScaledResidueMaskedFloat64x2:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v)
+ case OpTruncScaledResidueMaskedFloat64x4:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
+ case OpTruncScaledResidueMaskedFloat64x8:
+ return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
case OpUnsignedSignedQuadDotProdAccumulateInt32x16:
v.Op = OpAMD64VPDPBUSD512
return true
return true
}
}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt16x16 x y mask)
+ // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt16x32 x y mask)
+ // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt16x8 x y mask)
+ // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt8x16 x y mask)
+ // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt8x32 x y mask)
+ // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedInt8x64 x y mask)
+ // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint16x16 x y mask)
+ // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint16x32 x y mask)
+ // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint16x8 x y mask)
+ // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint8x16 x y mask)
+ // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint8x32 x y mask)
+ // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddSaturatedMaskedUint8x64 x y mask)
+ // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPADDSBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpAddr(v *Value) bool {
v_0 := v.Args[0]
// match: (Addr {sym} base)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat32x16 [a] x)
+ // match: (CeilScaledFloat32x16 [a] x)
// result: (VRNDSCALEPS512 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat32x4 [a] x)
+ // match: (CeilScaledFloat32x4 [a] x)
// result: (VRNDSCALEPS128 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat32x8 [a] x)
+ // match: (CeilScaledFloat32x8 [a] x)
// result: (VRNDSCALEPS256 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat64x2 [a] x)
+ // match: (CeilScaledFloat64x2 [a] x)
// result: (VRNDSCALEPD128 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat64x4 [a] x)
+ // match: (CeilScaledFloat64x4 [a] x)
// result: (VRNDSCALEPD256 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (CeilWithPrecisionFloat64x8 [a] x)
+ // match: (CeilScaledFloat64x8 [a] x)
// result: (VRNDSCALEPD512 [a+2] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat32x16 [a] x mask)
+ // match: (CeilScaledMaskedFloat32x16 [a] x mask)
// result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat32x4 [a] x mask)
+ // match: (CeilScaledMaskedFloat32x4 [a] x mask)
// result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat32x8 [a] x mask)
+ // match: (CeilScaledMaskedFloat32x8 [a] x mask)
// result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat64x2 [a] x mask)
+ // match: (CeilScaledMaskedFloat64x2 [a] x mask)
// result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat64x4 [a] x mask)
+ // match: (CeilScaledMaskedFloat64x4 [a] x mask)
// result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (CeilWithPrecisionMaskedFloat64x8 [a] x mask)
+ // match: (CeilScaledMaskedFloat64x8 [a] x mask)
// result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat32x16 [a] x)
+ // result: (VREDUCEPS512 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS512)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat32x4 [a] x)
+ // result: (VREDUCEPS128 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS128)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat32x8 [a] x)
+ // result: (VREDUCEPS256 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS256)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat64x2 [a] x)
+ // result: (VREDUCEPD128 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD128)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat64x4 [a] x)
+ // result: (VREDUCEPD256 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD256)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CeilScaledResidueFloat64x8 [a] x)
+ // result: (VREDUCEPD512 [a+2] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD512)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat32x16 [a] x mask)
+ // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked512)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat32x4 [a] x mask)
+ // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked128)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat32x8 [a] x mask)
+ // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked256)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat64x2 [a] x mask)
+ // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked128)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat64x4 [a] x mask)
+ // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked256)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (CeilScaledResidueMaskedFloat64x8 [a] x mask)
+ // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked512)
+ v.AuxInt = int8ToAuxInt(a + 2)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat32x16 [a] x)
- // result: (VREDUCEPS512 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS512)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat32x4 [a] x)
- // result: (VREDUCEPS128 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS128)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat32x8 [a] x)
- // result: (VREDUCEPS256 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS256)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat64x2 [a] x)
- // result: (VREDUCEPD128 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD128)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat64x4 [a] x)
- // result: (VREDUCEPD256 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD256)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithCeilWithPrecisionFloat64x8 [a] x)
- // result: (VREDUCEPD512 [a+2] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD512)
- v.AuxInt = int8ToAuxInt(a + 2)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat32x16 [a] x mask)
- // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked512)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat32x4 [a] x mask)
- // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked128)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat32x8 [a] x mask)
- // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked256)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat64x2 [a] x mask)
- // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked128)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat64x4 [a] x mask)
- // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked256)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithCeilWithPrecisionMaskedFloat64x8 [a] x mask)
- // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked512)
- v.AuxInt = int8ToAuxInt(a + 2)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat32x16 [a] x)
- // result: (VREDUCEPS512 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS512)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat32x4 [a] x)
- // result: (VREDUCEPS128 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS128)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat32x8 [a] x)
- // result: (VREDUCEPS256 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS256)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat64x2 [a] x)
- // result: (VREDUCEPD128 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD128)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat64x4 [a] x)
- // result: (VREDUCEPD256 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD256)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithFloorWithPrecisionFloat64x8 [a] x)
- // result: (VREDUCEPD512 [a+1] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD512)
- v.AuxInt = int8ToAuxInt(a + 1)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat32x16 [a] x mask)
- // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked512)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat32x4 [a] x mask)
- // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked128)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat32x8 [a] x mask)
- // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked256)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat64x2 [a] x mask)
- // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked128)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat64x4 [a] x mask)
- // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked256)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithFloorWithPrecisionMaskedFloat64x8 [a] x mask)
- // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked512)
- v.AuxInt = int8ToAuxInt(a + 1)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat32x16 [a] x)
- // result: (VREDUCEPS512 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS512)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat32x4 [a] x)
- // result: (VREDUCEPS128 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS128)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat32x8 [a] x)
- // result: (VREDUCEPS256 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS256)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat64x2 [a] x)
- // result: (VREDUCEPD128 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD128)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat64x4 [a] x)
- // result: (VREDUCEPD256 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD256)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithRoundWithPrecisionFloat64x8 [a] x)
- // result: (VREDUCEPD512 [a+0] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD512)
- v.AuxInt = int8ToAuxInt(a + 0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat32x16 [a] x mask)
- // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked512)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat32x4 [a] x mask)
- // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked128)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat32x8 [a] x mask)
- // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked256)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat64x2 [a] x mask)
- // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked128)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat64x4 [a] x mask)
- // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked256)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithRoundWithPrecisionMaskedFloat64x8 [a] x mask)
- // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked512)
- v.AuxInt = int8ToAuxInt(a + 0)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat32x16 [a] x)
- // result: (VREDUCEPS512 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS512)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat32x4 [a] x)
- // result: (VREDUCEPS128 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS128)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat32x8 [a] x)
- // result: (VREDUCEPS256 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPS256)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat64x2 [a] x)
- // result: (VREDUCEPD128 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD128)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat64x4 [a] x)
- // result: (VREDUCEPD256 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD256)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v *Value) bool {
- v_0 := v.Args[0]
- // match: (DiffWithTruncWithPrecisionFloat64x8 [a] x)
- // result: (VREDUCEPD512 [a+3] x)
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- v.reset(OpAMD64VREDUCEPD512)
- v.AuxInt = int8ToAuxInt(a + 3)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat32x16 [a] x mask)
- // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked512)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat32x4 [a] x mask)
- // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked128)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat32x8 [a] x mask)
- // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPSMasked256)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat64x2 [a] x mask)
- // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked128)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat64x4 [a] x mask)
- // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked256)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (DiffWithTruncWithPrecisionMaskedFloat64x8 [a] x mask)
- // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- a := auxIntToInt8(v.AuxInt)
- x := v_0
- mask := v_1
- v.reset(OpAMD64VREDUCEPDMasked512)
- v.AuxInt = int8ToAuxInt(a + 3)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
func rewriteValueAMD64_OpDiv16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat32x16 [a] x)
+ // match: (FloorScaledFloat32x16 [a] x)
// result: (VRNDSCALEPS512 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat32x4 [a] x)
+ // match: (FloorScaledFloat32x4 [a] x)
// result: (VRNDSCALEPS128 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat32x8 [a] x)
+ // match: (FloorScaledFloat32x8 [a] x)
// result: (VRNDSCALEPS256 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat64x2 [a] x)
+ // match: (FloorScaledFloat64x2 [a] x)
// result: (VRNDSCALEPD128 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat64x4 [a] x)
+ // match: (FloorScaledFloat64x4 [a] x)
// result: (VRNDSCALEPD256 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (FloorWithPrecisionFloat64x8 [a] x)
+ // match: (FloorScaledFloat64x8 [a] x)
// result: (VRNDSCALEPD512 [a+1] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat32x16 [a] x mask)
+ // match: (FloorScaledMaskedFloat32x16 [a] x mask)
// result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat32x4 [a] x mask)
+ // match: (FloorScaledMaskedFloat32x4 [a] x mask)
// result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat32x8 [a] x mask)
+ // match: (FloorScaledMaskedFloat32x8 [a] x mask)
// result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat64x2 [a] x mask)
+ // match: (FloorScaledMaskedFloat64x2 [a] x mask)
// result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat64x4 [a] x mask)
+ // match: (FloorScaledMaskedFloat64x4 [a] x mask)
// result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (FloorWithPrecisionMaskedFloat64x8 [a] x mask)
+ // match: (FloorScaledMaskedFloat64x8 [a] x mask)
// result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat32x16 [a] x)
+ // result: (VREDUCEPS512 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS512)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat32x4 [a] x)
+ // result: (VREDUCEPS128 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS128)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat32x8 [a] x)
+ // result: (VREDUCEPS256 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS256)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat64x2 [a] x)
+ // result: (VREDUCEPD128 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD128)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat64x4 [a] x)
+ // result: (VREDUCEPD256 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD256)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (FloorScaledResidueFloat64x8 [a] x)
+ // result: (VREDUCEPD512 [a+1] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD512)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat32x16 [a] x mask)
+ // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked512)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat32x4 [a] x mask)
+ // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked128)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat32x8 [a] x mask)
+ // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked256)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat64x2 [a] x mask)
+ // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked128)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat64x4 [a] x mask)
+ // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked256)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (FloorScaledResidueMaskedFloat64x8 [a] x mask)
+ // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked512)
+ v.AuxInt = int8ToAuxInt(a + 1)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
}
return false
}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat32x16 x y mask)
- // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat32x4 x y mask)
- // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat32x8 x y mask)
- // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x2(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat64x2 x y mask)
- // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat64x4 x y mask)
- // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (MulByPowOf2MaskedFloat64x8 x y mask)
- // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VSCALEFPDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x2(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt16x16 x y mask)
- // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat32x16 x y mask)
+ // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt16x32 x y mask)
- // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat32x4 x y mask)
+ // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt16x8 x y mask)
- // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat32x8 x y mask)
+ // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt32x16 x y mask)
- // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat64x2 x y mask)
+ // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt32x4 x y mask)
- // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat64x4 x y mask)
+ // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt32x8 x y mask)
- // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ // match: (MulMaskedFloat64x8 x y mask)
+ // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v.reset(OpAMD64VMULPDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt16x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt64x2 x y mask)
- // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt16x16 x y mask)
+ // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLQMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v.reset(OpAMD64VPMULLWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt16x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt64x4 x y mask)
- // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt16x32 x y mask)
+ // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLQMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v.reset(OpAMD64VPMULLWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulLowMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt16x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulLowMaskedInt64x8 x y mask)
- // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt16x8 x y mask)
+ // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULLQMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v.reset(OpAMD64VPMULLWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat32x16 x y mask)
- // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt32x16 x y mask)
+ // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPSMasked512)
+ v.reset(OpAMD64VPMULLDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt32x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat32x4 x y mask)
- // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt32x4 x y mask)
+ // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPSMasked128)
+ v.reset(OpAMD64VPMULLDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt32x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat32x8 x y mask)
- // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt32x8 x y mask)
+ // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPSMasked256)
+ v.reset(OpAMD64VPMULLDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt64x2(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat64x2 x y mask)
- // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt64x2 x y mask)
+ // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPDMasked128)
+ v.reset(OpAMD64VPMULLQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt64x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat64x4 x y mask)
- // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt64x4 x y mask)
+ // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPDMasked256)
+ v.reset(OpAMD64VPMULLQMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpMulMaskedInt64x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulMaskedFloat64x8 x y mask)
- // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (MulMaskedInt64x8 x y mask)
+ // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VMULPDMasked512)
+ v.reset(OpAMD64VPMULLQMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
- v_0 := v.Args[0]
- // match: (RoundToEven x)
- // result: (ROUNDSD [0] x)
- for {
- x := v_0
- v.reset(OpAMD64ROUNDSD)
- v.AuxInt = int8ToAuxInt(0)
- v.AddArg(x)
- return true
- }
-}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat32x16 [a] x)
+ // match: (RoundScaledFloat32x16 [a] x)
// result: (VRNDSCALEPS512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat32x4 [a] x)
+ // match: (RoundScaledFloat32x4 [a] x)
// result: (VRNDSCALEPS128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat32x8 [a] x)
+ // match: (RoundScaledFloat32x8 [a] x)
// result: (VRNDSCALEPS256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat64x2 [a] x)
+ // match: (RoundScaledFloat64x2 [a] x)
// result: (VRNDSCALEPD128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat64x4 [a] x)
+ // match: (RoundScaledFloat64x4 [a] x)
// result: (VRNDSCALEPD256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundWithPrecisionFloat64x8 [a] x)
+ // match: (RoundScaledFloat64x8 [a] x)
// result: (VRNDSCALEPD512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat32x16 [a] x mask)
+ // match: (RoundScaledMaskedFloat32x16 [a] x mask)
// result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat32x4 [a] x mask)
+ // match: (RoundScaledMaskedFloat32x4 [a] x mask)
// result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat32x8 [a] x mask)
+ // match: (RoundScaledMaskedFloat32x8 [a] x mask)
// result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat64x2 [a] x mask)
+ // match: (RoundScaledMaskedFloat64x2 [a] x mask)
// result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat64x4 [a] x mask)
+ // match: (RoundScaledMaskedFloat64x4 [a] x mask)
// result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundScaledMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundWithPrecisionMaskedFloat64x8 [a] x mask)
+ // match: (RoundScaledMaskedFloat64x8 [a] x mask)
// result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
+func rewriteValueAMD64_OpRoundScaledResidueFloat32x16(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat32x16 [a] x)
+ // result: (VREDUCEPS512 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS512)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueFloat32x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat32x4 [a] x)
+ // result: (VREDUCEPS128 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS128)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueFloat32x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat32x8 [a] x)
+ // result: (VREDUCEPS256 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS256)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueFloat64x2(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat64x2 [a] x)
+ // result: (VREDUCEPD128 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD128)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueFloat64x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat64x4 [a] x)
+ // result: (VREDUCEPD256 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD256)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueFloat64x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundScaledResidueFloat64x8 [a] x)
+ // result: (VREDUCEPD512 [a+0] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD512)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat32x16 [a] x mask)
+ // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked512)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat32x4 [a] x mask)
+ // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked128)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat32x8 [a] x mask)
+ // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked256)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat64x2 [a] x mask)
+ // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked128)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat64x4 [a] x mask)
+ // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked256)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (RoundScaledResidueMaskedFloat64x8 [a] x mask)
+ // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked512)
+ v.AuxInt = int8ToAuxInt(a + 0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundToEven x)
+ // result: (ROUNDSD [0] x)
+ for {
+ x := v_0
+ v.reset(OpAMD64ROUNDSD)
+ v.AuxInt = int8ToAuxInt(0)
+ v.AddArg(x)
+ return true
+ }
+}
func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt16x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt16x16 x y mask)
- // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt16x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt16x32 x y mask)
- // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt16x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt16x8 x y mask)
- // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt8x16 x y mask)
- // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt8x32 x y mask)
- // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedInt8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedInt8x64 x y mask)
- // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint16x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedUint16x16 x y mask)
- // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint16x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedUint16x32 x y mask)
- // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedAddMaskedUint16x8 x y mask)
- // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask)
+ // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSWMasked128)
+ v.reset(OpAMD64VPMADDUBSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedUint8x16 x y mask)
- // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedUint8x32 x y mask)
- // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddMaskedUint8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddMaskedUint8x64 x y mask)
- // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPADDSBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedInt16x16 x y mask)
- // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask)
+ // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSWMasked256)
+ v.reset(OpAMD64VPMADDUBSWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedInt16x32 x y mask)
- // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask)
+ // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSWMasked512)
+ v.reset(OpAMD64VPMADDUBSWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt16x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedSubMaskedInt16x8 x y mask)
- // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedSubMaskedInt8x16 x y mask)
- // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedSubMaskedInt8x32 x y mask)
- // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedSubMaskedInt8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedSubMaskedInt8x64 x y mask)
- // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint16x16 x y mask)
- // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask)
+ // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint16x32 x y mask)
- // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask)
+ // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint16x8 x y mask)
- // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask)
+ // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPSUBSWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint8x16(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint8x16 x y mask)
- // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat32x16 x y mask)
+ // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat32x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint8x32 x y mask)
- // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat32x4 x y mask)
+ // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedSubMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat32x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedSubMaskedUint8x64 x y mask)
- // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat32x8 x y mask)
+ // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat64x2(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask)
- // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat64x2 x y mask)
+ // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat64x4(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask)
- // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat64x4 x y mask)
+ // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpScaleMaskedFloat64x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask)
- // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // match: (ScaleMaskedFloat64x8 x y mask)
+ // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v.reset(OpAMD64VSCALEFPDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask)
- // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask)
- // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask)
- // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
func rewriteValueAMD64_OpSelect0(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
return true
}
}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt16x16 x y mask)
+ // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt16x32 x y mask)
+ // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt16x8 x y mask)
+ // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt8x16 x y mask)
+ // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt8x32 x y mask)
+ // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedInt8x64 x y mask)
+ // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint16x16 x y mask)
+ // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint16x32 x y mask)
+ // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint16x8 x y mask)
+ // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint8x16 x y mask)
+ // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint8x32 x y mask)
+ // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (SubSaturatedMaskedUint8x64 x y mask)
+ // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPSUBSBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpTrunc(v *Value) bool {
v_0 := v.Args[0]
// match: (Trunc x)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat32x16 [a] x)
+ // match: (TruncScaledFloat32x16 [a] x)
// result: (VRNDSCALEPS512 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat32x4 [a] x)
+ // match: (TruncScaledFloat32x4 [a] x)
// result: (VRNDSCALEPS128 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat32x8 [a] x)
+ // match: (TruncScaledFloat32x8 [a] x)
// result: (VRNDSCALEPS256 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat64x2 [a] x)
+ // match: (TruncScaledFloat64x2 [a] x)
// result: (VRNDSCALEPD128 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat64x4 [a] x)
+ // match: (TruncScaledFloat64x4 [a] x)
// result: (VRNDSCALEPD256 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (TruncWithPrecisionFloat64x8 [a] x)
+ // match: (TruncScaledFloat64x8 [a] x)
// result: (VRNDSCALEPD512 [a+3] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat32x16 [a] x mask)
+ // match: (TruncScaledMaskedFloat32x16 [a] x mask)
// result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat32x4 [a] x mask)
+ // match: (TruncScaledMaskedFloat32x4 [a] x mask)
// result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat32x8 [a] x mask)
+ // match: (TruncScaledMaskedFloat32x8 [a] x mask)
// result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat64x2 [a] x mask)
+ // match: (TruncScaledMaskedFloat64x2 [a] x mask)
// result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat64x4 [a] x mask)
+ // match: (TruncScaledMaskedFloat64x4 [a] x mask)
// result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (TruncWithPrecisionMaskedFloat64x8 [a] x mask)
+ // match: (TruncScaledMaskedFloat64x8 [a] x mask)
// result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
+func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat32x16 [a] x)
+ // result: (VREDUCEPS512 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS512)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueFloat32x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat32x4 [a] x)
+ // result: (VREDUCEPS128 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS128)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueFloat32x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat32x8 [a] x)
+ // result: (VREDUCEPS256 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPS256)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueFloat64x2(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat64x2 [a] x)
+ // result: (VREDUCEPD128 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD128)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat64x4 [a] x)
+ // result: (VREDUCEPD256 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD256)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (TruncScaledResidueFloat64x8 [a] x)
+ // result: (VREDUCEPD512 [a+3] x)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ v.reset(OpAMD64VREDUCEPD512)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat32x16 [a] x mask)
+ // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked512)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat32x4 [a] x mask)
+ // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked128)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat32x8 [a] x mask)
+ // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPSMasked256)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat64x2 [a] x mask)
+ // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked128)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat64x4 [a] x mask)
+ // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked256)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (TruncScaledResidueMaskedFloat64x8 [a] x mask)
+ // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VREDUCEPDMasked512)
+ v.AuxInt = int8ToAuxInt(a + 3)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.AddPairs", opLen2(ssa.OpAddPairsFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.AddPairs", opLen2(ssa.OpAddPairsInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddPairs", opLen2(ssa.OpAddPairsInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.AddPairs", opLen2(ssa.OpAddPairsInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.AddPairs", opLen2(ssa.OpAddPairsInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.AddPairs", opLen2(ssa.OpAddPairsUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.AddPairs", opLen2(ssa.OpAddPairsUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.AddPairs", opLen2(ssa.OpAddPairsUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.AddPairs", opLen2(ssa.OpAddPairsUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.AddSaturated", opLen2(ssa.OpAddSaturatedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Mul", opLen2(ssa.OpMulFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Mul", opLen2(ssa.OpMulFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Mul", opLen2(ssa.OpMulFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Mul", opLen2(ssa.OpMulInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Mul", opLen2(ssa.OpMulInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Mul", opLen2(ssa.OpMulInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.Mul", opLen2(ssa.OpMulInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.Mul", opLen2(ssa.OpMulInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.Mul", opLen2(ssa.OpMulInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.Mul", opLen2(ssa.OpMulInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.Mul", opLen2(ssa.OpMulInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.Mul", opLen2(ssa.OpMulInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.MulLow", opLen2(ssa.OpMulLowInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.MulLow", opLen2(ssa.OpMulLowInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.MulLow", opLen2(ssa.OpMulLowInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.MulLow", opLen2(ssa.OpMulLowInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.MulLow", opLen2(ssa.OpMulLowInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.MulLow", opLen2(ssa.OpMulLowInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.SubPairs", opLen2(ssa.OpSubPairsFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.SubPairs", opLen2(ssa.OpSubPairsInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubPairs", opLen2(ssa.OpSubPairsInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.SubPairs", opLen2(ssa.OpSubPairsInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.SubPairs", opLen2(ssa.OpSubPairsInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.SubPairs", opLen2(ssa.OpSubPairsUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SubPairs", opLen2(ssa.OpSubPairsUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.SubPairs", opLen2(ssa.OpSubPairsUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.SubPairs", opLen2(ssa.OpSubPairsUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.SubSaturated", opLen2(ssa.OpSubSaturatedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
testFloat64x2Binary(t, simd.Float64x2.Mul, mulSlice[float64])
testFloat64x4Binary(t, simd.Float64x4.Mul, mulSlice[float64])
- testInt16x16Binary(t, simd.Int16x16.MulLow, mulSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.MulLow, mulSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.MulLow, mulSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.MulLow, mulSlice[int32])
+ testInt16x16Binary(t, simd.Int16x16.Mul, mulSlice[int16])
+ testInt16x8Binary(t, simd.Int16x8.Mul, mulSlice[int16])
+ testInt32x4Binary(t, simd.Int32x4.Mul, mulSlice[int32])
+ testInt32x8Binary(t, simd.Int32x8.Mul, mulSlice[int32])
- // testInt8x16Binary(t, simd.Int8x16.MulLow, mulSlice[int8]) // nope
- // testInt8x32Binary(t, simd.Int8x32.MulLow, mulSlice[int8])
+ // testInt8x16Binary(t, simd.Int8x16.Mul, mulSlice[int8]) // nope
+ // testInt8x32Binary(t, simd.Int8x32.Mul, mulSlice[int8])
- // TODO we should be able to do these, there's no difference between signed/unsigned mulLow
- // testUint16x16Binary(t, simd.Uint16x16.MulLow, mulSlice[uint16])
- // testUint16x8Binary(t, simd.Uint16x8.MulLow, mulSlice[uint16])
- // testUint32x4Binary(t, simd.Uint32x4.MulLow, mulSlice[uint32])
- // testUint32x8Binary(t, simd.Uint32x8.MulLow, mulSlice[uint32])
- // testUint64x2Binary(t, simd.Uint64x2.MulLow, mulSlice[uint64])
- // testUint64x4Binary(t, simd.Uint64x4.MulLow, mulSlice[uint64])
+ // TODO we should be able to do these, there's no difference between signed/unsigned Mul
+ // testUint16x16Binary(t, simd.Uint16x16.Mul, mulSlice[uint16])
+ // testUint16x8Binary(t, simd.Uint16x8.Mul, mulSlice[uint16])
+ // testUint32x4Binary(t, simd.Uint32x4.Mul, mulSlice[uint32])
+ // testUint32x8Binary(t, simd.Uint32x8.Mul, mulSlice[uint32])
+ // testUint64x2Binary(t, simd.Uint64x2.Mul, mulSlice[uint64])
+ // testUint64x4Binary(t, simd.Uint64x4.Mul, mulSlice[uint64])
- // testUint8x16Binary(t, simd.Uint8x16.MulLow, mulSlice[uint8]) // nope
- // testUint8x32Binary(t, simd.Uint8x32.MulLow, mulSlice[uint8])
+ // testUint8x16Binary(t, simd.Uint8x16.Mul, mulSlice[uint8]) // nope
+ // testUint8x32Binary(t, simd.Uint8x32.Mul, mulSlice[uint8])
if simd.HasAVX512() {
- testInt64x2Binary(t, simd.Int64x2.MulLow, mulSlice[int64]) // avx512 only
- testInt64x4Binary(t, simd.Int64x4.MulLow, mulSlice[int64])
+ testInt64x2Binary(t, simd.Int64x2.Mul, mulSlice[int64]) // avx512 only
+ testInt64x4Binary(t, simd.Int64x4.Mul, mulSlice[int64])
testFloat32x16Binary(t, simd.Float32x16.Mul, mulSlice[float32])
testFloat64x8Binary(t, simd.Float64x8.Mul, mulSlice[float64])
- // testInt8x64Binary(t, simd.Int8x64.MulLow, mulSlice[int8]) // nope
- testInt16x32Binary(t, simd.Int16x32.MulLow, mulSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.MulLow, mulSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.MulLow, mulSlice[int64])
- // testUint8x64Binary(t, simd.Uint8x64.MulLow, mulSlice[uint8]) // nope
+ // testInt8x64Binary(t, simd.Int8x64.Mul, mulSlice[int8]) // nope
+ testInt16x32Binary(t, simd.Int16x32.Mul, mulSlice[int16])
+ testInt32x16Binary(t, simd.Int32x16.Mul, mulSlice[int32])
+ testInt64x8Binary(t, simd.Int64x8.Mul, mulSlice[int64])
+ // testUint8x64Binary(t, simd.Uint8x64.Mul, mulSlice[uint8]) // nope
// TODO signed should do the job
- // testUint16x32Binary(t, simd.Uint16x32.MulLow, mulSlice[uint16])
- // testUint32x16Binary(t, simd.Uint32x16.MulLow, mulSlice[uint32])
- // testUint64x8Binary(t, simd.Uint64x8.MulLow, mulSlice[uint64])
+ // testUint16x32Binary(t, simd.Uint16x32.Mul, mulSlice[uint16])
+ // testUint32x16Binary(t, simd.Uint32x16.Mul, mulSlice[uint32])
+ // testUint64x8Binary(t, simd.Uint64x8.Mul, mulSlice[uint64])
}
}
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
+/* AddPairs */
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x4) AddPairs(y Float32x4) Float32x4
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x8) AddPairs(y Float32x8) Float32x8
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x2) AddPairs(y Float64x2) Float64x2
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x4) AddPairs(y Float64x4) Float64x4
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Int16x8) AddPairs(y Int16x8) Int16x8
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Int16x16) AddPairs(y Int16x16) Int16x16
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Int32x4) AddPairs(y Int32x4) Int32x4
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Int32x8) AddPairs(y Int32x8) Int32x8
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
+
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
+
+/* AddPairsSaturated */
+
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDSW, CPU Feature: AVX
+func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
+
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+//
+// Asm: VPHADDSW, CPU Feature: AVX2
+func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
+
+/* AddSaturated */
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Int8x16) AddSaturated(y Int8x16) Int8x16
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Int8x32) AddSaturated(y Int8x32) Int8x32
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Int8x64) AddSaturated(y Int8x64) Int8x64
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Int16x8) AddSaturated(y Int16x8) Int16x8
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Int16x16) AddSaturated(y Int16x16) Int16x16
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Int16x32) AddSaturated(y Int16x32) Int16x32
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
+
+/* AddSaturatedMasked */
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Int8x16) AddSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Int8x32) AddSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Int8x64) AddSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Int16x8) AddSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Int16x16) AddSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Int16x32) AddSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSB, CPU Feature: AVX512BW
+func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
+
+// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPADDSW, CPU Feature: AVX512BW
+func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
+
/* AddSub */
// AddSub subtracts even elements and adds odd elements of two vectors.
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Ceil() Float64x4
-/* CeilWithPrecision */
+/* CeilScaled */
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) CeilWithPrecision(prec uint8) Float32x4
+func (x Float32x4) CeilScaled(prec uint8) Float32x4
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) CeilWithPrecision(prec uint8) Float32x8
+func (x Float32x8) CeilScaled(prec uint8) Float32x8
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) CeilWithPrecision(prec uint8) Float32x16
+func (x Float32x16) CeilScaled(prec uint8) Float32x16
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) CeilWithPrecision(prec uint8) Float64x2
+func (x Float64x2) CeilScaled(prec uint8) Float64x2
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) CeilWithPrecision(prec uint8) Float64x4
+func (x Float64x4) CeilScaled(prec uint8) Float64x4
-// CeilWithPrecision rounds elements up with specified precision.
+// CeilScaled rounds elements up with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) CeilWithPrecision(prec uint8) Float64x8
+func (x Float64x8) CeilScaled(prec uint8) Float64x8
-/* CeilWithPrecisionMasked */
+/* CeilScaledMasked */
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) CeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
+func (x Float32x4) CeilScaledMasked(prec uint8, mask Mask32x4) Float32x4
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) CeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
+func (x Float32x8) CeilScaledMasked(prec uint8, mask Mask32x8) Float32x8
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) CeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
+func (x Float32x16) CeilScaledMasked(prec uint8, mask Mask32x16) Float32x16
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) CeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
+func (x Float64x2) CeilScaledMasked(prec uint8, mask Mask64x2) Float64x2
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) CeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
+func (x Float64x4) CeilScaledMasked(prec uint8, mask Mask64x4) Float64x4
-// CeilWithPrecisionMasked rounds elements up with specified precision.
+// CeilScaledMasked rounds elements up with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) CeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
+func (x Float64x8) CeilScaledMasked(prec uint8, mask Mask64x8) Float64x8
+
+/* CeilScaledResidue */
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) CeilScaledResidue(prec uint8) Float32x4
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) CeilScaledResidue(prec uint8) Float32x8
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) CeilScaledResidue(prec uint8) Float32x16
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) CeilScaledResidue(prec uint8) Float64x2
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
+
+// CeilScaledResidue computes the difference after ceiling with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
+
+/* CeilScaledResidueMasked */
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) CeilScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) CeilScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) CeilScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) CeilScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) CeilScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
+
+// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) CeilScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
/* Compress */
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
-/* DiffWithCeilWithPrecision */
+/* Div */
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+// Div divides elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithCeilWithPrecision(prec uint8) Float32x4
+// Asm: VDIVPS, CPU Feature: AVX
+func (x Float32x4) Div(y Float32x4) Float32x4
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+// Div divides elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithCeilWithPrecision(prec uint8) Float32x8
+// Asm: VDIVPS, CPU Feature: AVX
+func (x Float32x8) Div(y Float32x8) Float32x8
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+// Div divides elements of two vectors.
//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithCeilWithPrecision(prec uint8) Float32x16
+// Asm: VDIVPS, CPU Feature: AVX512F
+func (x Float32x16) Div(y Float32x16) Float32x16
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+// Div divides elements of two vectors.
//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithCeilWithPrecision(prec uint8) Float64x2
+// Asm: VDIVPD, CPU Feature: AVX
+func (x Float64x2) Div(y Float64x2) Float64x2
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithCeilWithPrecision(prec uint8) Float64x4
-
-// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithCeilWithPrecision(prec uint8) Float64x8
-
-/* DiffWithCeilWithPrecisionMasked */
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
-
-// DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithCeilWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* DiffWithFloorWithPrecision */
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithFloorWithPrecision(prec uint8) Float32x4
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithFloorWithPrecision(prec uint8) Float32x8
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithFloorWithPrecision(prec uint8) Float32x16
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithFloorWithPrecision(prec uint8) Float64x2
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithFloorWithPrecision(prec uint8) Float64x4
-
-// DiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithFloorWithPrecision(prec uint8) Float64x8
-
-/* DiffWithFloorWithPrecisionMasked */
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
-
-// DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithFloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* DiffWithRoundWithPrecision */
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithRoundWithPrecision(prec uint8) Float32x4
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithRoundWithPrecision(prec uint8) Float32x8
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithRoundWithPrecision(prec uint8) Float32x16
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithRoundWithPrecision(prec uint8) Float64x2
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithRoundWithPrecision(prec uint8) Float64x4
-
-// DiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithRoundWithPrecision(prec uint8) Float64x8
-
-/* DiffWithRoundWithPrecisionMasked */
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
-
-// DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithRoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* DiffWithTruncWithPrecision */
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithTruncWithPrecision(prec uint8) Float32x4
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithTruncWithPrecision(prec uint8) Float32x8
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithTruncWithPrecision(prec uint8) Float32x16
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithTruncWithPrecision(prec uint8) Float64x2
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithTruncWithPrecision(prec uint8) Float64x4
-
-// DiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithTruncWithPrecision(prec uint8) Float64x8
-
-/* DiffWithTruncWithPrecisionMasked */
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
-
-// DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) DiffWithTruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* Div */
-
-// Div divides elements of two vectors.
-//
-// Asm: VDIVPS, CPU Feature: AVX
-func (x Float32x4) Div(y Float32x4) Float32x4
-
-// Div divides elements of two vectors.
-//
-// Asm: VDIVPS, CPU Feature: AVX
-func (x Float32x8) Div(y Float32x8) Float32x8
-
-// Div divides elements of two vectors.
-//
-// Asm: VDIVPS, CPU Feature: AVX512F
-func (x Float32x16) Div(y Float32x16) Float32x16
-
-// Div divides elements of two vectors.
-//
-// Asm: VDIVPD, CPU Feature: AVX
-func (x Float64x2) Div(y Float64x2) Float64x2
-
-// Div divides elements of two vectors.
+// Div divides elements of two vectors.
//
// Asm: VDIVPD, CPU Feature: AVX
func (x Float64x4) Div(y Float64x4) Float64x4
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Floor() Float64x4
-/* FloorWithPrecision */
+/* FloorScaled */
+
+// FloorScaled rounds elements down with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x4) FloorScaled(prec uint8) Float32x4
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaled rounds elements down with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) FloorWithPrecision(prec uint8) Float32x4
+func (x Float32x8) FloorScaled(prec uint8) Float32x8
+
+// FloorScaled rounds elements down with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x16) FloorScaled(prec uint8) Float32x16
+
+// FloorScaled rounds elements down with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x2) FloorScaled(prec uint8) Float64x2
+
+// FloorScaled rounds elements down with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x4) FloorScaled(prec uint8) Float64x4
+
+// FloorScaled rounds elements down with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x8) FloorScaled(prec uint8) Float64x8
+
+/* FloorScaledMasked */
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x4) FloorScaledMasked(prec uint8, mask Mask32x4) Float32x4
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x8) FloorScaledMasked(prec uint8, mask Mask32x8) Float32x8
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x16) FloorScaledMasked(prec uint8, mask Mask32x16) Float32x16
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x2) FloorScaledMasked(prec uint8, mask Mask64x2) Float64x2
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x4) FloorScaledMasked(prec uint8, mask Mask64x4) Float64x4
+
+// FloorScaledMasked rounds elements down with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x8) FloorScaledMasked(prec uint8, mask Mask64x8) Float64x8
+
+/* FloorScaledResidue */
+
+// FloorScaledResidue computes the difference after flooring with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) FloorWithPrecision(prec uint8) Float32x8
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) FloorWithPrecision(prec uint8) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) FloorWithPrecision(prec uint8) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) FloorWithPrecision(prec uint8) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
-// FloorWithPrecision rounds elements down with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) FloorWithPrecision(prec uint8) Float64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
-/* FloorWithPrecisionMasked */
+/* FloorScaledResidueMasked */
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) FloorWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) FloorScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) FloorWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) FloorScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) FloorWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) FloorScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) FloorWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) FloorScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) FloorWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) FloorScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-// FloorWithPrecisionMasked rounds elements down with specified precision.
+// FloorScaledResidueMasked computes the difference after flooring with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) FloorWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
/* FusedMultiplyAdd */
// Asm: VMULPD, CPU Feature: AVX512F
func (x Float64x8) Mul(y Float64x8) Float64x8
-/* MulByPowOf2 */
-
-// MulByPowOf2 multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x4) MulByPowOf2(y Float32x4) Float32x4
-
-// MulByPowOf2 multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x8) MulByPowOf2(y Float32x8) Float32x8
-
-// MulByPowOf2 multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x16) MulByPowOf2(y Float32x16) Float32x16
-
-// MulByPowOf2 multiplies elements by a power of 2.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x2) MulByPowOf2(y Float64x2) Float64x2
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Int16x8) Mul(y Int16x8) Int16x8
-// MulByPowOf2 multiplies elements by a power of 2.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Int16x16) Mul(y Int16x16) Int16x16
-// MulByPowOf2 multiplies elements by a power of 2.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8
-
-/* MulByPowOf2Masked */
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Int16x32) Mul(y Int16x32) Int16x32
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x4) MulByPowOf2Masked(y Float32x4, mask Mask32x4) Float32x4
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Int32x4) Mul(y Int32x4) Int32x4
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x8) MulByPowOf2Masked(y Float32x8, mask Mask32x8) Float32x8
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Int32x8) Mul(y Int32x8) Int32x8
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPS, CPU Feature: AVX512F
-func (x Float32x16) MulByPowOf2Masked(y Float32x16, mask Mask32x16) Float32x16
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Int32x16) Mul(y Int32x16) Int32x16
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x2) MulByPowOf2Masked(y Float64x2, mask Mask64x2) Float64x2
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x2) Mul(y Int64x2) Int64x2
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x4) MulByPowOf2Masked(y Float64x4, mask Mask64x4) Float64x4
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x4) Mul(y Int64x4) Int64x4
-// MulByPowOf2Masked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VSCALEFPD, CPU Feature: AVX512F
-func (x Float64x8) MulByPowOf2Masked(y Float64x8, mask Mask64x8) Float64x8
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x8) Mul(y Int64x8) Int64x8
/* MulEvenWiden */
// Asm: VPMULHUW, CPU Feature: AVX512BW
func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
-/* MulLow */
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Int16x8) MulLow(y Int16x8) Int16x8
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Int16x16) MulLow(y Int16x16) Int16x16
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x32) MulLow(y Int16x32) Int16x32
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Int32x4) MulLow(y Int32x4) Int32x4
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Int32x8) MulLow(y Int32x8) Int32x8
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x16) MulLow(y Int32x16) Int32x16
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x2) MulLow(y Int64x2) Int64x2
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x4) MulLow(y Int64x4) Int64x4
-
-// MulLow multiplies elements and stores the low part of the result.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x8) MulLow(y Int64x8) Int64x8
-
-/* MulLowMasked */
+/* MulMasked */
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x8) MulLowMasked(y Int16x8, mask Mask16x8) Int16x8
+// Asm: VMULPS, CPU Feature: AVX512F
+func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x16) MulLowMasked(y Int16x16, mask Mask16x16) Int16x16
+// Asm: VMULPS, CPU Feature: AVX512F
+func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLW, CPU Feature: AVX512BW
-func (x Int16x32) MulLowMasked(y Int16x32, mask Mask16x32) Int16x32
+// Asm: VMULPS, CPU Feature: AVX512F
+func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x4) MulLowMasked(y Int32x4, mask Mask32x4) Int32x4
+// Asm: VMULPD, CPU Feature: AVX512F
+func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x8) MulLowMasked(y Int32x8, mask Mask32x8) Int32x8
+// Asm: VMULPD, CPU Feature: AVX512F
+func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLD, CPU Feature: AVX512F
-func (x Int32x16) MulLowMasked(y Int32x16, mask Mask32x16) Int32x16
+// Asm: VMULPD, CPU Feature: AVX512F
+func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x2) MulLowMasked(y Int64x2, mask Mask64x2) Int64x2
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Int16x8) MulMasked(y Int16x8, mask Mask16x8) Int16x8
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x4) MulLowMasked(y Int64x4, mask Mask64x4) Int64x4
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Int16x16) MulMasked(y Int16x16, mask Mask16x16) Int16x16
-// MulLowMasked multiplies elements and stores the low part of the result.
+// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULLQ, CPU Feature: AVX512DQ
-func (x Int64x8) MulLowMasked(y Int64x8, mask Mask64x8) Int64x8
-
-/* MulMasked */
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Int16x32) MulMasked(y Int16x32, mask Mask16x32) Int16x32
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Int32x4) MulMasked(y Int32x4, mask Mask32x4) Int32x4
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Int32x8) MulMasked(y Int32x8, mask Mask32x8) Int32x8
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPS, CPU Feature: AVX512F
-func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Int32x16) MulMasked(y Int32x16, mask Mask32x16) Int32x16
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x2) MulMasked(y Int64x2, mask Mask64x2) Int64x2
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x4) MulMasked(y Int64x4, mask Mask64x4) Int64x4
// MulMasked multiplies corresponding elements of two vectors.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VMULPD, CPU Feature: AVX512F
-func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
/* NotEqual */
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512F
-func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* PairDotProd */
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) PairDotProd(y Int16x8) Int32x4
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX2
-func (x Int16x16) PairDotProd(y Int16x16) Int32x8
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProd(y Int16x32) Int32x16
-
-/* PairDotProdMasked */
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
-
-/* PairwiseAdd */
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x4) PairwiseAdd(y Float32x4) Float32x4
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x8) PairwiseAdd(y Float32x8) Float32x8
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x2) PairwiseAdd(y Float64x2) Float64x2
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x4) PairwiseAdd(y Float64x4) Float64x4
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Int16x8) PairwiseAdd(y Int16x8) Int16x8
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Int16x16) PairwiseAdd(y Int16x16) Int16x16
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Int32x4) PairwiseAdd(y Int32x4) Int32x4
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Int32x8) PairwiseAdd(y Int32x8) Int32x8
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Uint16x8) PairwiseAdd(y Uint16x8) Uint16x8
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Uint16x16) PairwiseAdd(y Uint16x16) Uint16x16
+// Asm: VPORQ, CPU Feature: AVX512F
+func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// OrMasked performs a bitwise OR operation between two vectors.
//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Uint32x4) PairwiseAdd(y Uint32x4) Uint32x4
-
-// PairwiseAdd horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// This operation is applied selectively under a write mask.
//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Uint32x8) PairwiseAdd(y Uint32x8) Uint32x8
-
-/* PairwiseSub */
+// Asm: VPORQ, CPU Feature: AVX512F
+func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// OrMasked performs a bitwise OR operation between two vectors.
//
-// Asm: VHSUBPS, CPU Feature: AVX
-func (x Float32x4) PairwiseSub(y Float32x4) Float32x4
-
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// This operation is applied selectively under a write mask.
//
-// Asm: VHSUBPS, CPU Feature: AVX
-func (x Float32x8) PairwiseSub(y Float32x8) Float32x8
+// Asm: VPORQ, CPU Feature: AVX512F
+func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-//
-// Asm: VHSUBPD, CPU Feature: AVX
-func (x Float64x2) PairwiseSub(y Float64x2) Float64x2
+/* PairDotProd */
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VHSUBPD, CPU Feature: AVX
-func (x Float64x4) PairwiseSub(y Float64x4) Float64x4
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) PairDotProd(y Int16x8) Int32x4
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Int16x8) PairwiseSub(y Int16x8) Int16x8
+// Asm: VPMADDWD, CPU Feature: AVX2
+func (x Int16x16) PairDotProd(y Int16x16) Int32x8
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProd multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Int16x16) PairwiseSub(y Int16x16) Int16x16
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x32) PairDotProd(y Int16x32) Int32x16
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-//
-// Asm: VPHSUBD, CPU Feature: AVX
-func (x Int32x4) PairwiseSub(y Int32x4) Int32x4
+/* PairDotProdMasked */
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProdMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Int32x8) PairwiseSub(y Int32x8) Int32x8
-
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// This operation is applied selectively under a write mask.
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Uint16x8) PairwiseSub(y Uint16x8) Uint16x8
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProdMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Uint16x16) PairwiseSub(y Uint16x16) Uint16x16
-
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// This operation is applied selectively under a write mask.
//
-// Asm: VPHSUBD, CPU Feature: AVX
-func (x Uint32x4) PairwiseSub(y Uint32x4) Uint32x4
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
-// PairwiseSub horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// PairDotProdMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
//
-// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Uint32x8) PairwiseSub(y Uint32x8) Uint32x8
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
/* Permute */
// Asm: VPRORVD, CPU Feature: AVX512F
func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512F
-func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512F
-func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Round */
-
-// Round rounds elements to the nearest integer.
-//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Round() Float32x4
-
-// Round rounds elements to the nearest integer.
-//
-// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Round() Float32x8
-
-// Round rounds elements to the nearest integer.
-//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Round() Float64x2
-
-// Round rounds elements to the nearest integer.
-//
-// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Round() Float64x4
-
-/* RoundWithPrecision */
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundWithPrecision(prec uint8) Float32x4
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundWithPrecision(prec uint8) Float32x8
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundWithPrecision(prec uint8) Float32x16
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundWithPrecision(prec uint8) Float64x2
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundWithPrecision(prec uint8) Float64x4
-
-// RoundWithPrecision rounds elements with specified precision.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundWithPrecision(prec uint8) Float64x8
-
-/* RoundWithPrecisionMasked */
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundWithPrecisionMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec is expected to be a constant, non-constant value will trigger a runtime panic.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* SaturatedAdd */
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Int8x16) SaturatedAdd(y Int8x16) Int8x16
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Int8x32) SaturatedAdd(y Int8x32) Int8x32
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedAdd(y Int8x64) Int8x64
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Int16x8) SaturatedAdd(y Int16x8) Int16x8
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedAdd(y Int16x16) Int16x16
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedAdd(y Int16x32) Int16x32
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Uint8x16) SaturatedAdd(y Uint8x16) Uint8x16
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Uint8x32) SaturatedAdd(y Uint8x32) Uint8x32
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedAdd(y Uint8x64) Uint8x64
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Uint16x8) SaturatedAdd(y Uint16x8) Uint16x8
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16
-
-// SaturatedAdd adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32
-
-/* SaturatedAddDotProd */
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x4) SaturatedAddDotProd(y Int16x8, z Int16x8) Int32x4
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x8) SaturatedAddDotProd(y Int16x16, z Int16x16) Int32x8
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
+// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedAddDotProd(y Int16x32, z Int16x32) Int32x16
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPRORVD, CPU Feature: AVX512F
+func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-/* SaturatedAddDotProdMasked */
+// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPRORVD, CPU Feature: AVX512F
+func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedAddDotProdMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
+// Asm: VPRORVQ, CPU Feature: AVX512F
+func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedAddDotProdMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
+// Asm: VPRORVQ, CPU Feature: AVX512F
+func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedAddDotProdMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
+// Asm: VPRORVQ, CPU Feature: AVX512F
+func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-/* SaturatedAddMasked */
+/* Round */
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// Round rounds elements to the nearest integer.
//
-// This operation is applied selectively under a write mask.
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Round() Float32x4
+
+// Round rounds elements to the nearest integer.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedAddMasked(y Int8x16, mask Mask8x16) Int8x16
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Round() Float32x8
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// Round rounds elements to the nearest integer.
//
-// This operation is applied selectively under a write mask.
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Round() Float64x2
+
+// Round rounds elements to the nearest integer.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedAddMasked(y Int8x32, mask Mask8x32) Int8x32
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Round() Float64x4
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+/* RoundScaled */
+
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedAddMasked(y Int8x64, mask Mask8x64) Int8x64
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x4) RoundScaled(prec uint8) Float32x4
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedAddMasked(y Int16x8, mask Mask16x8) Int16x8
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x8) RoundScaled(prec uint8) Float32x8
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedAddMasked(y Int16x16, mask Mask16x16) Int16x16
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x16) RoundScaled(prec uint8) Float32x16
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedAddMasked(y Int16x32, mask Mask16x32) Int16x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x2) RoundScaled(prec uint8) Float64x2
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedAddMasked(y Uint8x16, mask Mask8x16) Uint8x16
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x4) RoundScaled(prec uint8) Float64x4
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaled rounds elements with specified precision.
//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedAddMasked(y Uint8x32, mask Mask8x32) Uint8x32
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x8) RoundScaled(prec uint8) Float64x8
+
+/* RoundScaledMasked */
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedAddMasked(y Uint8x64, mask Mask8x64) Uint8x64
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x4) RoundScaledMasked(prec uint8, mask Mask32x4) Float32x4
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedAddMasked(y Uint16x8, mask Mask16x8) Uint16x8
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x8) RoundScaledMasked(prec uint8, mask Mask32x8) Float32x8
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedAddMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPS, CPU Feature: AVX512F
+func (x Float32x16) RoundScaledMasked(prec uint8, mask Mask32x16) Float32x16
-// SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+// RoundScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedAddMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-/* SaturatedPairwiseAdd */
-
-// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPHADDSW, CPU Feature: AVX
-func (x Int16x8) SaturatedPairwiseAdd(y Int16x8) Int16x8
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x2) RoundScaledMasked(prec uint8, mask Mask64x2) Float64x2
-// SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// RoundScaledMasked rounds elements with specified precision.
//
-// Asm: VPHADDSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedPairwiseAdd(y Int16x16) Int16x16
-
-/* SaturatedPairwiseSub */
-
-// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// This operation is applied selectively under a write mask.
//
-// Asm: VPHSUBSW, CPU Feature: AVX
-func (x Int16x8) SaturatedPairwiseSub(y Int16x8) Int16x8
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x4) RoundScaledMasked(prec uint8, mask Mask64x4) Float64x4
-// SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// RoundScaledMasked rounds elements with specified precision.
//
-// Asm: VPHSUBSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedPairwiseSub(y Int16x16) Int16x16
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VRNDSCALEPD, CPU Feature: AVX512F
+func (x Float64x8) RoundScaledMasked(prec uint8, mask Mask64x8) Float64x8
-/* SaturatedSub */
+/* RoundScaledResidue */
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSB, CPU Feature: AVX
-func (x Int8x16) SaturatedSub(y Int8x16) Int8x16
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSB, CPU Feature: AVX2
-func (x Int8x32) SaturatedSub(y Int8x32) Int8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) RoundScaledResidue(prec uint8) Float32x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedSub(y Int8x64) Int8x64
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSW, CPU Feature: AVX
-func (x Int16x8) SaturatedSub(y Int16x8) Int16x8
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) RoundScaledResidue(prec uint8) Float32x8
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSW, CPU Feature: AVX2
-func (x Int16x16) SaturatedSub(y Int16x16) Int16x16
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedSub(y Int16x32) Int16x32
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) RoundScaledResidue(prec uint8) Float32x16
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSB, CPU Feature: AVX
-func (x Uint8x16) SaturatedSub(y Uint8x16) Uint8x16
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSB, CPU Feature: AVX2
-func (x Uint8x32) SaturatedSub(y Uint8x32) Uint8x32
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) RoundScaledResidue(prec uint8) Float64x2
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedSub(y Uint8x64) Uint8x64
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSW, CPU Feature: AVX
-func (x Uint16x8) SaturatedSub(y Uint16x8) Uint16x8
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) RoundScaledResidue(prec uint8) Float64x4
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidue computes the difference after rounding with specified precision.
//
-// Asm: VPSUBSW, CPU Feature: AVX2
-func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16
-
-// SaturatedSub subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) RoundScaledResidue(prec uint8) Float64x8
-/* SaturatedSubMasked */
+/* RoundScaledResidueMasked */
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x16) SaturatedSubMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x32) SaturatedSubMasked(y Int8x32, mask Mask8x32) Int8x32
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) RoundScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Int8x64) SaturatedSubMasked(y Int8x64, mask Mask8x64) Int8x64
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) RoundScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x8) SaturatedSubMasked(y Int16x8, mask Mask16x8) Int16x8
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) RoundScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x16) SaturatedSubMasked(y Int16x16, mask Mask16x16) Int16x16
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) RoundScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Int16x32) SaturatedSubMasked(y Int16x32, mask Mask16x32) Int16x32
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) RoundScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// RoundScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedSubMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
-// This operation is applied selectively under a write mask.
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) RoundScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
+
+/* SaturatedAddDotProd */
+
+// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedSubMasked(y Uint8x32, mask Mask8x32) Uint8x32
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
+func (x Int32x4) SaturatedAddDotProd(y Int16x8, z Int16x8) Int32x4
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
//
-// This operation is applied selectively under a write mask.
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
+func (x Int32x8) SaturatedAddDotProd(y Int16x16, z Int16x16) Int32x8
+
+// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedSubMasked(y Uint8x64, mask Mask8x64) Uint8x64
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x16) SaturatedAddDotProd(y Int16x32, z Int16x32) Int32x16
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+/* SaturatedAddDotProdMasked */
+
+// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x8) SaturatedSubMasked(y Uint16x8, mask Mask16x8) Uint16x8
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x4) SaturatedAddDotProdMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x16) SaturatedSubMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x8) SaturatedAddDotProdMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-// SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
-func (x Uint16x32) SaturatedSubMasked(y Uint16x32, mask Mask16x32) Uint16x32
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x16) SaturatedAddDotProdMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
/* SaturatedUnsignedSignedPairDotProd */
// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
+func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
+/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
+
+// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
+
+/* Scale */
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x4) Scale(y Float32x4) Float32x4
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x8) Scale(y Float32x8) Float32x8
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x16) Scale(y Float32x16) Float32x16
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x2) Scale(y Float64x2) Float64x2
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x4) Scale(y Float64x4) Float64x4
+
+// Scale multiplies elements by a power of 2.
+//
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x8) Scale(y Float64x8) Float64x8
+
+/* ScaleMasked */
+
+// ScaleMasked multiplies elements by a power of 2.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x4) ScaleMasked(y Float32x4, mask Mask32x4) Float32x4
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// ScaleMasked multiplies elements by a power of 2.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+// This operation is applied selectively under a write mask.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x8) ScaleMasked(y Float32x8, mask Mask32x8) Float32x8
-/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
+// ScaleMasked multiplies elements by a power of 2.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VSCALEFPS, CPU Feature: AVX512F
+func (x Float32x16) ScaleMasked(y Float32x16, mask Mask32x16) Float32x16
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// ScaleMasked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x2) ScaleMasked(y Float64x2, mask Mask64x2) Float64x2
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// ScaleMasked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// ScaleMasked multiplies elements by a power of 2.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
+// Asm: VSCALEFPD, CPU Feature: AVX512F
+func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
/* Set128 */
// Asm: VPSUBQ, CPU Feature: AVX512F
func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
+/* SubPairs */
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPS, CPU Feature: AVX
+func (x Float32x4) SubPairs(y Float32x4) Float32x4
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPS, CPU Feature: AVX
+func (x Float32x8) SubPairs(y Float32x8) Float32x8
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPD, CPU Feature: AVX
+func (x Float64x2) SubPairs(y Float64x2) Float64x2
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VHSUBPD, CPU Feature: AVX
+func (x Float64x4) SubPairs(y Float64x4) Float64x4
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Int16x8) SubPairs(y Int16x8) Int16x8
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX2
+func (x Int16x16) SubPairs(y Int16x16) Int16x16
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX
+func (x Int32x4) SubPairs(y Int32x4) Int32x4
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX2
+func (x Int32x8) SubPairs(y Int32x8) Int32x8
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX2
+func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX
+func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4
+
+// SubPairs horizontally subtracts adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBD, CPU Feature: AVX2
+func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8
+
+/* SubPairsSaturated */
+
+// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBSW, CPU Feature: AVX
+func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8
+
+// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+//
+// Asm: VPHSUBSW, CPU Feature: AVX2
+func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16
+
+/* SubSaturated */
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX
+func (x Int8x16) SubSaturated(y Int8x16) Int8x16
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX2
+func (x Int8x32) SubSaturated(y Int8x32) Int8x32
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Int8x64) SubSaturated(y Int8x64) Int8x64
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX
+func (x Int16x8) SubSaturated(y Int16x8) Int16x8
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX2
+func (x Int16x16) SubSaturated(y Int16x16) Int16x16
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Int16x32) SubSaturated(y Int16x32) Int16x32
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX
+func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX2
+func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX
+func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX2
+func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
+
+// SubSaturated subtracts corresponding elements of two vectors with saturation.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
+
+/* SubSaturatedMasked */
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Int8x16) SubSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Int8x32) SubSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Int8x64) SubSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Int16x8) SubSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Int16x16) SubSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Int16x32) SubSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSB, CPU Feature: AVX512BW
+func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
+
+// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPSUBSW, CPU Feature: AVX512BW
+func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
+
/* Trunc */
// Trunc truncates elements towards zero.
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) Trunc() Float64x4
-/* TruncWithPrecision */
+/* TruncScaled */
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) TruncWithPrecision(prec uint8) Float32x4
+func (x Float32x4) TruncScaled(prec uint8) Float32x4
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) TruncWithPrecision(prec uint8) Float32x8
+func (x Float32x8) TruncScaled(prec uint8) Float32x8
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) TruncWithPrecision(prec uint8) Float32x16
+func (x Float32x16) TruncScaled(prec uint8) Float32x16
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) TruncWithPrecision(prec uint8) Float64x2
+func (x Float64x2) TruncScaled(prec uint8) Float64x2
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) TruncWithPrecision(prec uint8) Float64x4
+func (x Float64x4) TruncScaled(prec uint8) Float64x4
-// TruncWithPrecision truncates elements with specified precision.
+// TruncScaled truncates elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) TruncWithPrecision(prec uint8) Float64x8
+func (x Float64x8) TruncScaled(prec uint8) Float64x8
-/* TruncWithPrecisionMasked */
+/* TruncScaledMasked */
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) TruncWithPrecisionMasked(prec uint8, mask Mask32x4) Float32x4
+func (x Float32x4) TruncScaledMasked(prec uint8, mask Mask32x4) Float32x4
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) TruncWithPrecisionMasked(prec uint8, mask Mask32x8) Float32x8
+func (x Float32x8) TruncScaledMasked(prec uint8, mask Mask32x8) Float32x8
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) TruncWithPrecisionMasked(prec uint8, mask Mask32x16) Float32x16
+func (x Float32x16) TruncScaledMasked(prec uint8, mask Mask32x16) Float32x16
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) TruncWithPrecisionMasked(prec uint8, mask Mask64x2) Float64x2
+func (x Float64x2) TruncScaledMasked(prec uint8, mask Mask64x2) Float64x2
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) TruncWithPrecisionMasked(prec uint8, mask Mask64x4) Float64x4
+func (x Float64x4) TruncScaledMasked(prec uint8, mask Mask64x4) Float64x4
-// TruncWithPrecisionMasked truncates elements with specified precision.
+// TruncScaledMasked truncates elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) TruncWithPrecisionMasked(prec uint8, mask Mask64x8) Float64x8
+func (x Float64x8) TruncScaledMasked(prec uint8, mask Mask64x8) Float64x8
+
+/* TruncScaledResidue */
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) TruncScaledResidue(prec uint8) Float32x4
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) TruncScaledResidue(prec uint8) Float32x8
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) TruncScaledResidue(prec uint8) Float32x16
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) TruncScaledResidue(prec uint8) Float64x2
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
+
+// TruncScaledResidue computes the difference after truncating with specified precision.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
+
+/* TruncScaledResidueMasked */
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x4) TruncScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x8) TruncScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPS, CPU Feature: AVX512DQ
+func (x Float32x16) TruncScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x2) TruncScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x4) TruncScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
+
+// TruncScaledResidueMasked computes the difference after truncating with specified precision.
+//
+// This operation is applied selectively under a write mask.
+//
+// prec is expected to be a constant, non-constant value will trigger a runtime panic.
+//
+// Asm: VREDUCEPD, CPU Feature: AVX512DQ
+func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
/* UnsignedSignedQuadDotProdAccumulate */
testFloat32x8UnaryToInt32(t, simd.Float32x8.ConvertToInt32, toInt32Slice[float32])
}
-func TestDiffWithCeilWithPrecision(t *testing.T) {
+func TestCeilScaledResidue(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Needs AVX512")
}
testFloat64x8UnaryFlaky(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.DiffWithCeilWithPrecision(0) },
+ func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(0) },
map1(ceilResidueForPrecision[float64](0)),
0.001)
testFloat64x8UnaryFlaky(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.DiffWithCeilWithPrecision(1) },
+ func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(1) },
map1(ceilResidueForPrecision[float64](1)),
0.001)
testFloat64x8Unary(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilWithPrecision(0)) },
+ func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilScaled(0)) },
map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
}