ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
- ssa.OpAMD64VRCPPS128,
- ssa.OpAMD64VRCPPS256,
- ssa.OpAMD64VRCP14PS512,
- ssa.OpAMD64VRCP14PD128,
- ssa.OpAMD64VRCP14PD256,
- ssa.OpAMD64VRCP14PD512,
- ssa.OpAMD64VRSQRTPS128,
- ssa.OpAMD64VRSQRTPS256,
- ssa.OpAMD64VRSQRT14PS512,
- ssa.OpAMD64VRSQRT14PD128,
- ssa.OpAMD64VRSQRT14PD256,
- ssa.OpAMD64VRSQRT14PD512,
ssa.OpAMD64VCVTTPS2DQ128,
ssa.OpAMD64VCVTTPS2DQ256,
ssa.OpAMD64VCVTTPS2DQ512,
ssa.OpAMD64VPOPCNTQ128,
ssa.OpAMD64VPOPCNTQ256,
ssa.OpAMD64VPOPCNTQ512,
+ ssa.OpAMD64VRCPPS128,
+ ssa.OpAMD64VRCPPS256,
+ ssa.OpAMD64VRCP14PS512,
+ ssa.OpAMD64VRCP14PD128,
+ ssa.OpAMD64VRCP14PD256,
+ ssa.OpAMD64VRCP14PD512,
+ ssa.OpAMD64VRSQRTPS128,
+ ssa.OpAMD64VRSQRTPS256,
+ ssa.OpAMD64VRSQRT14PS512,
+ ssa.OpAMD64VRSQRT14PD128,
+ ssa.OpAMD64VRSQRT14PD256,
+ ssa.OpAMD64VRSQRT14PD512,
ssa.OpAMD64VSQRTPS128,
ssa.OpAMD64VSQRTPS256,
ssa.OpAMD64VSQRTPS512,
ssa.OpAMD64VPADDSW128,
ssa.OpAMD64VPADDSW256,
ssa.OpAMD64VPADDSW512,
+ ssa.OpAMD64VPADDUSB128,
+ ssa.OpAMD64VPADDUSB256,
+ ssa.OpAMD64VPADDUSB512,
+ ssa.OpAMD64VPADDUSW128,
+ ssa.OpAMD64VPADDUSW256,
+ ssa.OpAMD64VPADDUSW512,
ssa.OpAMD64VADDSUBPS128,
ssa.OpAMD64VADDSUBPS256,
ssa.OpAMD64VADDSUBPD128,
ssa.OpAMD64VPAVGW128,
ssa.OpAMD64VPAVGW256,
ssa.OpAMD64VPAVGW512,
+ ssa.OpAMD64VPSIGNB128,
+ ssa.OpAMD64VPSIGNB256,
+ ssa.OpAMD64VPSIGNW128,
+ ssa.OpAMD64VPSIGNW256,
+ ssa.OpAMD64VPSIGND128,
+ ssa.OpAMD64VPSIGND256,
ssa.OpAMD64VDIVPS128,
ssa.OpAMD64VDIVPS256,
ssa.OpAMD64VDIVPS512,
ssa.OpAMD64VDIVPD128,
ssa.OpAMD64VDIVPD256,
ssa.OpAMD64VDIVPD512,
+ ssa.OpAMD64VPMADDWD128,
+ ssa.OpAMD64VPMADDWD256,
+ ssa.OpAMD64VPMADDWD512,
+ ssa.OpAMD64VPMADDUBSW128,
+ ssa.OpAMD64VPMADDUBSW256,
+ ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VPCMPEQB128,
ssa.OpAMD64VPCMPEQB256,
ssa.OpAMD64VPCMPEQW128,
ssa.OpAMD64VPMULLQ512,
ssa.OpAMD64VPMULDQ128,
ssa.OpAMD64VPMULDQ256,
- ssa.OpAMD64VPMULDQ512,
ssa.OpAMD64VPMULUDQ128,
ssa.OpAMD64VPMULUDQ256,
- ssa.OpAMD64VPMULUDQ512,
- ssa.OpAMD64VPMULHW128,
- ssa.OpAMD64VPMULHW256,
- ssa.OpAMD64VPMULHW512,
ssa.OpAMD64VPMULHUW128,
ssa.OpAMD64VPMULHUW256,
- ssa.OpAMD64VPMULHUW512,
+ ssa.OpAMD64VPMULHW512,
ssa.OpAMD64VPOR128,
ssa.OpAMD64VPOR256,
ssa.OpAMD64VPORD512,
ssa.OpAMD64VPORQ512,
- ssa.OpAMD64VPMADDWD128,
- ssa.OpAMD64VPMADDWD256,
- ssa.OpAMD64VPMADDWD512,
ssa.OpAMD64VPERMB128,
ssa.OpAMD64VPERMB256,
ssa.OpAMD64VPERMB512,
ssa.OpAMD64VPRORVQ128,
ssa.OpAMD64VPRORVQ256,
ssa.OpAMD64VPRORVQ512,
- ssa.OpAMD64VPMADDUBSW128,
- ssa.OpAMD64VPMADDUBSW256,
- ssa.OpAMD64VPMADDUBSW512,
ssa.OpAMD64VSCALEFPS128,
ssa.OpAMD64VSCALEFPS256,
ssa.OpAMD64VSCALEFPS512,
ssa.OpAMD64VPSRLVQ128,
ssa.OpAMD64VPSRLVQ256,
ssa.OpAMD64VPSRLVQ512,
- ssa.OpAMD64VPSIGNB128,
- ssa.OpAMD64VPSIGNB256,
- ssa.OpAMD64VPSIGNW128,
- ssa.OpAMD64VPSIGNW256,
- ssa.OpAMD64VPSIGND128,
- ssa.OpAMD64VPSIGND256,
ssa.OpAMD64VSUBPS128,
ssa.OpAMD64VSUBPS256,
ssa.OpAMD64VSUBPS512,
ssa.OpAMD64VPSUBSW128,
ssa.OpAMD64VPSUBSW256,
ssa.OpAMD64VPSUBSW512,
+ ssa.OpAMD64VPSUBUSB128,
+ ssa.OpAMD64VPSUBUSB256,
+ ssa.OpAMD64VPSUBUSB512,
+ ssa.OpAMD64VPSUBUSW128,
+ ssa.OpAMD64VPSUBUSW256,
+ ssa.OpAMD64VPSUBUSW512,
ssa.OpAMD64VPXOR128,
ssa.OpAMD64VPXOR256,
ssa.OpAMD64VPXORD512,
ssa.OpAMD64VPADDSWMasked128,
ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512,
+ ssa.OpAMD64VPADDUSBMasked128,
+ ssa.OpAMD64VPADDUSBMasked256,
+ ssa.OpAMD64VPADDUSBMasked512,
+ ssa.OpAMD64VPADDUSWMasked128,
+ ssa.OpAMD64VPADDUSWMasked256,
+ ssa.OpAMD64VPADDUSWMasked512,
ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512,
ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512,
+ ssa.OpAMD64VPMADDWDMasked128,
+ ssa.OpAMD64VPMADDWDMasked256,
+ ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked128,
+ ssa.OpAMD64VPMADDUBSWMasked256,
+ ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VGF2P8MULBMasked128,
ssa.OpAMD64VGF2P8MULBMasked256,
ssa.OpAMD64VGF2P8MULBMasked512,
ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512,
- ssa.OpAMD64VPMULDQMasked128,
- ssa.OpAMD64VPMULDQMasked256,
- ssa.OpAMD64VPMULDQMasked512,
- ssa.OpAMD64VPMULUDQMasked128,
- ssa.OpAMD64VPMULUDQMasked256,
- ssa.OpAMD64VPMULUDQMasked512,
- ssa.OpAMD64VPMULHWMasked128,
- ssa.OpAMD64VPMULHWMasked256,
- ssa.OpAMD64VPMULHWMasked512,
ssa.OpAMD64VPMULHUWMasked128,
- ssa.OpAMD64VPMULHUWMasked256,
+ ssa.OpAMD64VPMULHWMasked256,
ssa.OpAMD64VPMULHUWMasked512,
ssa.OpAMD64VMULPSMasked128,
ssa.OpAMD64VMULPSMasked256,
ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512,
- ssa.OpAMD64VPMADDWDMasked128,
- ssa.OpAMD64VPMADDWDMasked256,
- ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPERMBMasked128,
ssa.OpAMD64VPERMBMasked256,
ssa.OpAMD64VPERMBMasked512,
ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512,
- ssa.OpAMD64VPMADDUBSWMasked128,
- ssa.OpAMD64VPMADDUBSWMasked256,
- ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VSCALEFPSMasked128,
ssa.OpAMD64VSCALEFPSMasked256,
ssa.OpAMD64VSCALEFPSMasked512,
ssa.OpAMD64VPSUBSWMasked128,
ssa.OpAMD64VPSUBSWMasked256,
ssa.OpAMD64VPSUBSWMasked512,
+ ssa.OpAMD64VPSUBUSBMasked128,
+ ssa.OpAMD64VPSUBUSBMasked256,
+ ssa.OpAMD64VPSUBUSBMasked512,
+ ssa.OpAMD64VPSUBUSWMasked128,
+ ssa.OpAMD64VPSUBUSWMasked256,
+ ssa.OpAMD64VPSUBUSWMasked512,
ssa.OpAMD64VPXORDMasked128,
ssa.OpAMD64VPXORDMasked256,
ssa.OpAMD64VPXORDMasked512,
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
- ssa.OpAMD64VRCP14PSMasked128,
- ssa.OpAMD64VRCP14PSMasked256,
- ssa.OpAMD64VRCP14PSMasked512,
- ssa.OpAMD64VRCP14PDMasked128,
- ssa.OpAMD64VRCP14PDMasked256,
- ssa.OpAMD64VRCP14PDMasked512,
- ssa.OpAMD64VRSQRT14PSMasked128,
- ssa.OpAMD64VRSQRT14PSMasked256,
- ssa.OpAMD64VRSQRT14PSMasked512,
- ssa.OpAMD64VRSQRT14PDMasked128,
- ssa.OpAMD64VRSQRT14PDMasked256,
- ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
ssa.OpAMD64VPOPCNTQMasked128,
ssa.OpAMD64VPOPCNTQMasked256,
ssa.OpAMD64VPOPCNTQMasked512,
+ ssa.OpAMD64VRCP14PSMasked128,
+ ssa.OpAMD64VRCP14PSMasked256,
+ ssa.OpAMD64VRCP14PSMasked512,
+ ssa.OpAMD64VRCP14PDMasked128,
+ ssa.OpAMD64VRCP14PDMasked256,
+ ssa.OpAMD64VRCP14PDMasked512,
+ ssa.OpAMD64VRSQRT14PSMasked128,
+ ssa.OpAMD64VRSQRT14PSMasked256,
+ ssa.OpAMD64VRSQRT14PSMasked512,
+ ssa.OpAMD64VRSQRT14PDMasked128,
+ ssa.OpAMD64VRSQRT14PDMasked256,
+ ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VSQRTPSMasked128,
ssa.OpAMD64VSQRTPSMasked256,
ssa.OpAMD64VSQRTPSMasked512,
ssa.OpAMD64VPSRAQMasked512const:
p = simdVkvImm8(s, v)
- case ssa.OpAMD64VDPPS128,
- ssa.OpAMD64VDPPS256,
- ssa.OpAMD64VDPPD128,
- ssa.OpAMD64VCMPPS128,
+ case ssa.OpAMD64VCMPPS128,
ssa.OpAMD64VCMPPS256,
ssa.OpAMD64VCMPPD128,
ssa.OpAMD64VCMPPD256,
case ssa.OpAMD64VPDPWSSD128,
ssa.OpAMD64VPDPWSSD256,
ssa.OpAMD64VPDPWSSD512,
+ ssa.OpAMD64VPDPWSSDS128,
+ ssa.OpAMD64VPDPWSSDS256,
+ ssa.OpAMD64VPDPWSSDS512,
+ ssa.OpAMD64VPDPBUSD128,
+ ssa.OpAMD64VPDPBUSD256,
+ ssa.OpAMD64VPDPBUSD512,
+ ssa.OpAMD64VPDPBUSDS128,
+ ssa.OpAMD64VPDPBUSDS256,
+ ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VFMADD213PS128,
ssa.OpAMD64VFMADD213PS256,
ssa.OpAMD64VFMADD213PS512,
ssa.OpAMD64VPERMI2Q256,
ssa.OpAMD64VPERMI2PD512,
ssa.OpAMD64VPERMI2Q512,
- ssa.OpAMD64VPDPWSSDS128,
- ssa.OpAMD64VPDPWSSDS256,
- ssa.OpAMD64VPDPWSSDS512,
- ssa.OpAMD64VPDPBUSDS128,
- ssa.OpAMD64VPDPBUSDS256,
- ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VPSHLDVW128,
ssa.OpAMD64VPSHLDVW256,
ssa.OpAMD64VPSHLDVW512,
ssa.OpAMD64VPSHRDVD512,
ssa.OpAMD64VPSHRDVQ128,
ssa.OpAMD64VPSHRDVQ256,
- ssa.OpAMD64VPSHRDVQ512,
- ssa.OpAMD64VPDPBUSD128,
- ssa.OpAMD64VPDPBUSD256,
- ssa.OpAMD64VPDPBUSD512:
+ ssa.OpAMD64VPSHRDVQ512:
p = simdV31ResultInArg0(s, v)
case ssa.OpAMD64VPDPWSSDMasked128,
ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPDPWSSDMasked512,
+ ssa.OpAMD64VPDPWSSDSMasked128,
+ ssa.OpAMD64VPDPWSSDSMasked256,
+ ssa.OpAMD64VPDPWSSDSMasked512,
+ ssa.OpAMD64VPDPBUSDMasked128,
+ ssa.OpAMD64VPDPBUSDMasked256,
+ ssa.OpAMD64VPDPBUSDMasked512,
+ ssa.OpAMD64VPDPBUSDSMasked128,
+ ssa.OpAMD64VPDPBUSDSMasked256,
+ ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VFMADD213PSMasked128,
ssa.OpAMD64VFMADD213PSMasked256,
ssa.OpAMD64VFMADD213PSMasked512,
ssa.OpAMD64VPERMI2QMasked256,
ssa.OpAMD64VPERMI2PDMasked512,
ssa.OpAMD64VPERMI2QMasked512,
- ssa.OpAMD64VPDPWSSDSMasked128,
- ssa.OpAMD64VPDPWSSDSMasked256,
- ssa.OpAMD64VPDPWSSDSMasked512,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VPSHLDVWMasked128,
ssa.OpAMD64VPSHLDVWMasked256,
ssa.OpAMD64VPSHLDVWMasked512,
ssa.OpAMD64VPSHRDVDMasked512,
ssa.OpAMD64VPSHRDVQMasked128,
ssa.OpAMD64VPSHRDVQMasked256,
- ssa.OpAMD64VPSHRDVQMasked512,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked512:
+ ssa.OpAMD64VPSHRDVQMasked512:
p = simdV3kvResultInArg0(s, v)
case ssa.OpAMD64VPSLLW128,
ssa.OpAMD64VPDPWSSDMasked128,
ssa.OpAMD64VPDPWSSDMasked256,
ssa.OpAMD64VPDPWSSDMasked512,
+ ssa.OpAMD64VPDPWSSDSMasked128,
+ ssa.OpAMD64VPDPWSSDSMasked256,
+ ssa.OpAMD64VPDPWSSDSMasked512,
+ ssa.OpAMD64VPDPBUSDMasked128,
+ ssa.OpAMD64VPDPBUSDMasked256,
+ ssa.OpAMD64VPDPBUSDMasked512,
+ ssa.OpAMD64VPDPBUSDSMasked128,
+ ssa.OpAMD64VPDPBUSDSMasked256,
+ ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VADDPSMasked128,
ssa.OpAMD64VADDPSMasked256,
ssa.OpAMD64VADDPSMasked512,
ssa.OpAMD64VPADDSWMasked128,
ssa.OpAMD64VPADDSWMasked256,
ssa.OpAMD64VPADDSWMasked512,
+ ssa.OpAMD64VPADDUSBMasked128,
+ ssa.OpAMD64VPADDUSBMasked256,
+ ssa.OpAMD64VPADDUSBMasked512,
+ ssa.OpAMD64VPADDUSWMasked128,
+ ssa.OpAMD64VPADDUSWMasked256,
+ ssa.OpAMD64VPADDUSWMasked512,
ssa.OpAMD64VPANDDMasked128,
ssa.OpAMD64VPANDDMasked256,
ssa.OpAMD64VPANDDMasked512,
ssa.OpAMD64VPANDNQMasked128,
ssa.OpAMD64VPANDNQMasked256,
ssa.OpAMD64VPANDNQMasked512,
- ssa.OpAMD64VRCP14PSMasked128,
- ssa.OpAMD64VRCP14PSMasked256,
- ssa.OpAMD64VRCP14PSMasked512,
- ssa.OpAMD64VRCP14PDMasked128,
- ssa.OpAMD64VRCP14PDMasked256,
- ssa.OpAMD64VRCP14PDMasked512,
- ssa.OpAMD64VRSQRT14PSMasked128,
- ssa.OpAMD64VRSQRT14PSMasked256,
- ssa.OpAMD64VRSQRT14PSMasked512,
- ssa.OpAMD64VRSQRT14PDMasked128,
- ssa.OpAMD64VRSQRT14PDMasked256,
- ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VPAVGBMasked128,
ssa.OpAMD64VPAVGBMasked256,
ssa.OpAMD64VPAVGBMasked512,
ssa.OpAMD64VDIVPDMasked128,
ssa.OpAMD64VDIVPDMasked256,
ssa.OpAMD64VDIVPDMasked512,
+ ssa.OpAMD64VPMADDWDMasked128,
+ ssa.OpAMD64VPMADDWDMasked256,
+ ssa.OpAMD64VPMADDWDMasked512,
+ ssa.OpAMD64VPMADDUBSWMasked128,
+ ssa.OpAMD64VPMADDUBSWMasked256,
+ ssa.OpAMD64VPMADDUBSWMasked512,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
ssa.OpAMD64VPEXPANDQMasked128,
ssa.OpAMD64VPEXPANDQMasked256,
ssa.OpAMD64VPEXPANDQMasked512,
- ssa.OpAMD64VFMADD213PSMasked128,
- ssa.OpAMD64VFMADD213PSMasked256,
- ssa.OpAMD64VFMADD213PSMasked512,
- ssa.OpAMD64VFMADD213PDMasked128,
- ssa.OpAMD64VFMADD213PDMasked256,
- ssa.OpAMD64VFMADD213PDMasked512,
- ssa.OpAMD64VFMADDSUB213PSMasked128,
- ssa.OpAMD64VFMADDSUB213PSMasked256,
- ssa.OpAMD64VFMADDSUB213PSMasked512,
- ssa.OpAMD64VFMADDSUB213PDMasked128,
- ssa.OpAMD64VFMADDSUB213PDMasked256,
- ssa.OpAMD64VFMADDSUB213PDMasked512,
- ssa.OpAMD64VFMSUBADD213PSMasked128,
- ssa.OpAMD64VFMSUBADD213PSMasked256,
- ssa.OpAMD64VFMSUBADD213PSMasked512,
- ssa.OpAMD64VFMSUBADD213PDMasked128,
- ssa.OpAMD64VFMSUBADD213PDMasked256,
- ssa.OpAMD64VFMSUBADD213PDMasked512,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked128,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked256,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked512,
ssa.OpAMD64VPMINUQMasked128,
ssa.OpAMD64VPMINUQMasked256,
ssa.OpAMD64VPMINUQMasked512,
- ssa.OpAMD64VPMULDQMasked128,
- ssa.OpAMD64VPMULDQMasked256,
- ssa.OpAMD64VPMULDQMasked512,
- ssa.OpAMD64VPMULUDQMasked128,
- ssa.OpAMD64VPMULUDQMasked256,
- ssa.OpAMD64VPMULUDQMasked512,
- ssa.OpAMD64VPMULHWMasked128,
- ssa.OpAMD64VPMULHWMasked256,
- ssa.OpAMD64VPMULHWMasked512,
+ ssa.OpAMD64VFMADD213PSMasked128,
+ ssa.OpAMD64VFMADD213PSMasked256,
+ ssa.OpAMD64VFMADD213PSMasked512,
+ ssa.OpAMD64VFMADD213PDMasked128,
+ ssa.OpAMD64VFMADD213PDMasked256,
+ ssa.OpAMD64VFMADD213PDMasked512,
+ ssa.OpAMD64VFMADDSUB213PSMasked128,
+ ssa.OpAMD64VFMADDSUB213PSMasked256,
+ ssa.OpAMD64VFMADDSUB213PSMasked512,
+ ssa.OpAMD64VFMADDSUB213PDMasked128,
+ ssa.OpAMD64VFMADDSUB213PDMasked256,
+ ssa.OpAMD64VFMADDSUB213PDMasked512,
ssa.OpAMD64VPMULHUWMasked128,
- ssa.OpAMD64VPMULHUWMasked256,
+ ssa.OpAMD64VPMULHWMasked256,
ssa.OpAMD64VPMULHUWMasked512,
ssa.OpAMD64VMULPSMasked128,
ssa.OpAMD64VMULPSMasked256,
ssa.OpAMD64VPMULLQMasked128,
ssa.OpAMD64VPMULLQMasked256,
ssa.OpAMD64VPMULLQMasked512,
+ ssa.OpAMD64VFMSUBADD213PSMasked128,
+ ssa.OpAMD64VFMSUBADD213PSMasked256,
+ ssa.OpAMD64VFMSUBADD213PSMasked512,
+ ssa.OpAMD64VFMSUBADD213PDMasked128,
+ ssa.OpAMD64VFMSUBADD213PDMasked256,
+ ssa.OpAMD64VFMSUBADD213PDMasked512,
+ ssa.OpAMD64VPOPCNTBMasked128,
+ ssa.OpAMD64VPOPCNTBMasked256,
+ ssa.OpAMD64VPOPCNTBMasked512,
+ ssa.OpAMD64VPOPCNTWMasked128,
+ ssa.OpAMD64VPOPCNTWMasked256,
+ ssa.OpAMD64VPOPCNTWMasked512,
+ ssa.OpAMD64VPOPCNTDMasked128,
+ ssa.OpAMD64VPOPCNTDMasked256,
+ ssa.OpAMD64VPOPCNTDMasked512,
+ ssa.OpAMD64VPOPCNTQMasked128,
+ ssa.OpAMD64VPOPCNTQMasked256,
+ ssa.OpAMD64VPOPCNTQMasked512,
ssa.OpAMD64VPORDMasked128,
ssa.OpAMD64VPORDMasked256,
ssa.OpAMD64VPORDMasked512,
ssa.OpAMD64VPORQMasked128,
ssa.OpAMD64VPORQMasked256,
ssa.OpAMD64VPORQMasked512,
- ssa.OpAMD64VPMADDWDMasked128,
- ssa.OpAMD64VPMADDWDMasked256,
- ssa.OpAMD64VPMADDWDMasked512,
ssa.OpAMD64VPERMI2BMasked128,
ssa.OpAMD64VPERMI2BMasked256,
ssa.OpAMD64VPERMI2BMasked512,
ssa.OpAMD64VPERMQMasked256,
ssa.OpAMD64VPERMPDMasked512,
ssa.OpAMD64VPERMQMasked512,
- ssa.OpAMD64VPOPCNTBMasked128,
- ssa.OpAMD64VPOPCNTBMasked256,
- ssa.OpAMD64VPOPCNTBMasked512,
- ssa.OpAMD64VPOPCNTWMasked128,
- ssa.OpAMD64VPOPCNTWMasked256,
- ssa.OpAMD64VPOPCNTWMasked512,
- ssa.OpAMD64VPOPCNTDMasked128,
- ssa.OpAMD64VPOPCNTDMasked256,
- ssa.OpAMD64VPOPCNTDMasked512,
- ssa.OpAMD64VPOPCNTQMasked128,
- ssa.OpAMD64VPOPCNTQMasked256,
- ssa.OpAMD64VPOPCNTQMasked512,
+ ssa.OpAMD64VRCP14PSMasked128,
+ ssa.OpAMD64VRCP14PSMasked256,
+ ssa.OpAMD64VRCP14PSMasked512,
+ ssa.OpAMD64VRCP14PDMasked128,
+ ssa.OpAMD64VRCP14PDMasked256,
+ ssa.OpAMD64VRCP14PDMasked512,
+ ssa.OpAMD64VRSQRT14PSMasked128,
+ ssa.OpAMD64VRSQRT14PSMasked256,
+ ssa.OpAMD64VRSQRT14PSMasked512,
+ ssa.OpAMD64VRSQRT14PDMasked128,
+ ssa.OpAMD64VRSQRT14PDMasked256,
+ ssa.OpAMD64VRSQRT14PDMasked512,
ssa.OpAMD64VPROLDMasked128,
ssa.OpAMD64VPROLDMasked256,
ssa.OpAMD64VPROLDMasked512,
ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512,
- ssa.OpAMD64VPDPWSSDSMasked128,
- ssa.OpAMD64VPDPWSSDSMasked256,
- ssa.OpAMD64VPDPWSSDSMasked512,
- ssa.OpAMD64VPMADDUBSWMasked128,
- ssa.OpAMD64VPMADDUBSWMasked256,
- ssa.OpAMD64VPMADDUBSWMasked512,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VSCALEFPSMasked128,
ssa.OpAMD64VSCALEFPSMasked256,
ssa.OpAMD64VSCALEFPSMasked512,
ssa.OpAMD64VPSUBSWMasked128,
ssa.OpAMD64VPSUBSWMasked256,
ssa.OpAMD64VPSUBSWMasked512,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked512,
+ ssa.OpAMD64VPSUBUSBMasked128,
+ ssa.OpAMD64VPSUBUSBMasked256,
+ ssa.OpAMD64VPSUBUSBMasked512,
+ ssa.OpAMD64VPSUBUSWMasked128,
+ ssa.OpAMD64VPSUBUSWMasked256,
+ ssa.OpAMD64VPSUBUSWMasked512,
ssa.OpAMD64VPXORDMasked128,
ssa.OpAMD64VPXORDMasked256,
ssa.OpAMD64VPXORDMasked512,
// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-(AbsoluteInt8x16 ...) => (VPABSB128 ...)
-(AbsoluteInt8x32 ...) => (VPABSB256 ...)
-(AbsoluteInt8x64 ...) => (VPABSB512 ...)
-(AbsoluteInt16x8 ...) => (VPABSW128 ...)
-(AbsoluteInt16x16 ...) => (VPABSW256 ...)
-(AbsoluteInt16x32 ...) => (VPABSW512 ...)
-(AbsoluteInt32x4 ...) => (VPABSD128 ...)
-(AbsoluteInt32x8 ...) => (VPABSD256 ...)
-(AbsoluteInt32x16 ...) => (VPABSD512 ...)
-(AbsoluteInt64x2 ...) => (VPABSQ128 ...)
-(AbsoluteInt64x4 ...) => (VPABSQ256 ...)
-(AbsoluteInt64x8 ...) => (VPABSQ512 ...)
-(AbsoluteMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AbsoluteMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(AbsInt8x16 ...) => (VPABSB128 ...)
+(AbsInt8x32 ...) => (VPABSB256 ...)
+(AbsInt8x64 ...) => (VPABSB512 ...)
+(AbsInt16x8 ...) => (VPABSW128 ...)
+(AbsInt16x16 ...) => (VPABSW256 ...)
+(AbsInt16x32 ...) => (VPABSW512 ...)
+(AbsInt32x4 ...) => (VPABSD128 ...)
+(AbsInt32x8 ...) => (VPABSD256 ...)
+(AbsInt32x16 ...) => (VPABSD512 ...)
+(AbsInt64x2 ...) => (VPABSQ128 ...)
+(AbsInt64x4 ...) => (VPABSQ256 ...)
+(AbsInt64x8 ...) => (VPABSQ512 ...)
+(AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(AddFloat32x4 ...) => (VADDPS128 ...)
(AddFloat32x8 ...) => (VADDPS256 ...)
(AddFloat32x16 ...) => (VADDPS512 ...)
(AddUint64x2 ...) => (VPADDQ128 ...)
(AddUint64x4 ...) => (VPADDQ256 ...)
(AddUint64x8 ...) => (VPADDQ512 ...)
-(AddDotProdInt32x4 ...) => (VPDPWSSD128 ...)
-(AddDotProdInt32x8 ...) => (VPDPWSSD256 ...)
-(AddDotProdInt32x16 ...) => (VPDPWSSD512 ...)
-(AddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
+(AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
+(AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
+(AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
+(AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
+(AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
+(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
+(AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
+(AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
+(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(AddSaturatedInt16x8 ...) => (VPADDSW128 ...)
(AddSaturatedInt16x16 ...) => (VPADDSW256 ...)
(AddSaturatedInt16x32 ...) => (VPADDSW512 ...)
-(AddSaturatedUint8x16 ...) => (VPADDSB128 ...)
-(AddSaturatedUint8x32 ...) => (VPADDSB256 ...)
-(AddSaturatedUint8x64 ...) => (VPADDSB512 ...)
-(AddSaturatedUint16x8 ...) => (VPADDSW128 ...)
-(AddSaturatedUint16x16 ...) => (VPADDSW256 ...)
-(AddSaturatedUint16x32 ...) => (VPADDSW512 ...)
+(AddSaturatedUint8x16 ...) => (VPADDUSB128 ...)
+(AddSaturatedUint8x32 ...) => (VPADDUSB256 ...)
+(AddSaturatedUint8x64 ...) => (VPADDUSB512 ...)
+(AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
+(AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
+(AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
(AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
(AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ApproximateReciprocalFloat32x4 ...) => (VRCPPS128 ...)
-(ApproximateReciprocalFloat32x8 ...) => (VRCPPS256 ...)
-(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
-(ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
-(ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
-(ApproximateReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
-(ApproximateReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ApproximateReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ApproximateReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ApproximateReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ApproximateReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ApproximateReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
-(ApproximateReciprocalOfSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
-(ApproximateReciprocalOfSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
-(ApproximateReciprocalOfSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
-(ApproximateReciprocalOfSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
-(ApproximateReciprocalOfSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
-(ApproximateReciprocalOfSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ApproximateReciprocalOfSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(AverageUint8x16 ...) => (VPAVGB128 ...)
(AverageUint8x32 ...) => (VPAVGB256 ...)
(AverageUint8x64 ...) => (VPAVGB512 ...)
(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(CopySignInt8x16 ...) => (VPSIGNB128 ...)
+(CopySignInt8x32 ...) => (VPSIGNB256 ...)
+(CopySignInt16x8 ...) => (VPSIGNW128 ...)
+(CopySignInt16x16 ...) => (VPSIGNW256 ...)
+(CopySignInt32x4 ...) => (VPSIGND128 ...)
+(CopySignInt32x8 ...) => (VPSIGND256 ...)
(DivFloat32x4 ...) => (VDIVPS128 ...)
(DivFloat32x8 ...) => (VDIVPS256 ...)
(DivFloat32x16 ...) => (VDIVPS512 ...)
(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(DotProdBroadcastFloat32x4 x y) => (VDPPS128 [127] x y)
-(DotProdBroadcastFloat32x8 x y) => (VDPPS256 [127] x y)
-(DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y)
+(DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
+(DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
+(DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
+(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
+(DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
+(DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
+(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...)
-(FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...)
-(FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...)
-(FusedMultiplyAddFloat64x2 ...) => (VFMADD213PD128 ...)
-(FusedMultiplyAddFloat64x4 ...) => (VFMADD213PD256 ...)
-(FusedMultiplyAddFloat64x8 ...) => (VFMADD213PD512 ...)
-(FusedMultiplyAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FusedMultiplyAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FusedMultiplyAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FusedMultiplyAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FusedMultiplyAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FusedMultiplyAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
-(FusedMultiplyAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
-(FusedMultiplyAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
-(FusedMultiplyAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
-(FusedMultiplyAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
-(FusedMultiplyAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
-(FusedMultiplyAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FusedMultiplyAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(FusedMultiplySubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
-(FusedMultiplySubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
-(FusedMultiplySubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
-(FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
-(FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
-(FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
-(FusedMultiplySubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FusedMultiplySubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FusedMultiplySubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FusedMultiplySubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FusedMultiplySubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FusedMultiplySubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
(GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
(GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
(MulInt64x2 ...) => (VPMULLQ128 ...)
(MulInt64x4 ...) => (VPMULLQ256 ...)
(MulInt64x8 ...) => (VPMULLQ512 ...)
+(MulUint16x8 ...) => (VPMULLW128 ...)
+(MulUint16x16 ...) => (VPMULLW256 ...)
+(MulUint16x32 ...) => (VPMULLW512 ...)
+(MulUint32x4 ...) => (VPMULLD128 ...)
+(MulUint32x8 ...) => (VPMULLD256 ...)
+(MulUint32x16 ...) => (VPMULLD512 ...)
+(MulUint64x2 ...) => (VPMULLQ128 ...)
+(MulUint64x4 ...) => (VPMULLQ256 ...)
+(MulUint64x8 ...) => (VPMULLQ512 ...)
+(MulAddFloat32x4 ...) => (VFMADD213PS128 ...)
+(MulAddFloat32x8 ...) => (VFMADD213PS256 ...)
+(MulAddFloat32x16 ...) => (VFMADD213PS512 ...)
+(MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
+(MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
+(MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
+(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
+(MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
+(MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
+(MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
+(MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
+(MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
+(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
(MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
-(MulEvenWidenInt64x2 ...) => (VPMULDQ128 ...)
-(MulEvenWidenInt64x4 ...) => (VPMULDQ256 ...)
-(MulEvenWidenInt64x8 ...) => (VPMULDQ512 ...)
(MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
(MulEvenWidenUint32x8 ...) => (VPMULUDQ256 ...)
-(MulEvenWidenUint64x2 ...) => (VPMULUDQ128 ...)
-(MulEvenWidenUint64x4 ...) => (VPMULUDQ256 ...)
-(MulEvenWidenUint64x8 ...) => (VPMULUDQ512 ...)
-(MulEvenWidenMaskedInt64x2 x y mask) => (VPMULDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulEvenWidenMaskedInt64x4 x y mask) => (VPMULDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulEvenWidenMaskedInt64x8 x y mask) => (VPMULDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulEvenWidenMaskedUint64x2 x y mask) => (VPMULUDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulEvenWidenMaskedUint64x4 x y mask) => (VPMULUDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulEvenWidenMaskedUint64x8 x y mask) => (VPMULUDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulHighInt16x8 ...) => (VPMULHW128 ...)
-(MulHighInt16x16 ...) => (VPMULHW256 ...)
+(MulHighInt16x8 ...) => (VPMULHUW128 ...)
+(MulHighInt16x16 ...) => (VPMULHUW256 ...)
(MulHighInt16x32 ...) => (VPMULHW512 ...)
-(MulHighUint16x8 ...) => (VPMULHUW128 ...)
-(MulHighUint16x16 ...) => (VPMULHUW256 ...)
-(MulHighUint16x32 ...) => (VPMULHUW512 ...)
-(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MulHighMaskedInt16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MulHighMaskedInt16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
+(MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
+(MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
+(MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
+(MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
+(MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
+(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
(NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
(NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
(NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
+(OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
+(OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
+(OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
+(OnesCountInt16x8 ...) => (VPOPCNTW128 ...)
+(OnesCountInt16x16 ...) => (VPOPCNTW256 ...)
+(OnesCountInt16x32 ...) => (VPOPCNTW512 ...)
+(OnesCountInt32x4 ...) => (VPOPCNTD128 ...)
+(OnesCountInt32x8 ...) => (VPOPCNTD256 ...)
+(OnesCountInt32x16 ...) => (VPOPCNTD512 ...)
+(OnesCountInt64x2 ...) => (VPOPCNTQ128 ...)
+(OnesCountInt64x4 ...) => (VPOPCNTQ256 ...)
+(OnesCountInt64x8 ...) => (VPOPCNTQ512 ...)
+(OnesCountUint8x16 ...) => (VPOPCNTB128 ...)
+(OnesCountUint8x32 ...) => (VPOPCNTB256 ...)
+(OnesCountUint8x64 ...) => (VPOPCNTB512 ...)
+(OnesCountUint16x8 ...) => (VPOPCNTW128 ...)
+(OnesCountUint16x16 ...) => (VPOPCNTW256 ...)
+(OnesCountUint16x32 ...) => (VPOPCNTW512 ...)
+(OnesCountUint32x4 ...) => (VPOPCNTD128 ...)
+(OnesCountUint32x8 ...) => (VPOPCNTD256 ...)
+(OnesCountUint32x16 ...) => (VPOPCNTD512 ...)
+(OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
+(OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
+(OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
+(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(OrInt8x16 ...) => (VPOR128 ...)
(OrInt8x32 ...) => (VPOR256 ...)
(OrInt8x64 ...) => (VPORD512 ...)
(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PairDotProdInt16x8 ...) => (VPMADDWD128 ...)
-(PairDotProdInt16x16 ...) => (VPMADDWD256 ...)
-(PairDotProdInt16x32 ...) => (VPMADDWD512 ...)
-(PairDotProdMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PairDotProdMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PairDotProdMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(PermuteFloat32x8 ...) => (VPERMPS256 ...)
(PermuteFloat32x16 ...) => (VPERMPS512 ...)
(PermuteFloat64x4 ...) => (VPERMPD256 ...)
(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PopCountInt8x16 ...) => (VPOPCNTB128 ...)
-(PopCountInt8x32 ...) => (VPOPCNTB256 ...)
-(PopCountInt8x64 ...) => (VPOPCNTB512 ...)
-(PopCountInt16x8 ...) => (VPOPCNTW128 ...)
-(PopCountInt16x16 ...) => (VPOPCNTW256 ...)
-(PopCountInt16x32 ...) => (VPOPCNTW512 ...)
-(PopCountInt32x4 ...) => (VPOPCNTD128 ...)
-(PopCountInt32x8 ...) => (VPOPCNTD256 ...)
-(PopCountInt32x16 ...) => (VPOPCNTD512 ...)
-(PopCountInt64x2 ...) => (VPOPCNTQ128 ...)
-(PopCountInt64x4 ...) => (VPOPCNTQ256 ...)
-(PopCountInt64x8 ...) => (VPOPCNTQ512 ...)
-(PopCountUint8x16 ...) => (VPOPCNTB128 ...)
-(PopCountUint8x32 ...) => (VPOPCNTB256 ...)
-(PopCountUint8x64 ...) => (VPOPCNTB512 ...)
-(PopCountUint16x8 ...) => (VPOPCNTW128 ...)
-(PopCountUint16x16 ...) => (VPOPCNTW256 ...)
-(PopCountUint16x32 ...) => (VPOPCNTW512 ...)
-(PopCountUint32x4 ...) => (VPOPCNTD128 ...)
-(PopCountUint32x8 ...) => (VPOPCNTD256 ...)
-(PopCountUint32x16 ...) => (VPOPCNTD512 ...)
-(PopCountUint64x2 ...) => (VPOPCNTQ128 ...)
-(PopCountUint64x4 ...) => (VPOPCNTQ256 ...)
-(PopCountUint64x8 ...) => (VPOPCNTQ512 ...)
-(PopCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PopCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PopCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PopCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PopCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PopCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PopCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(PopCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PopCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PopCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(PopCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PopCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PopCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PopCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PopCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PopCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PopCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PopCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PopCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(PopCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PopCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PopCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(PopCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PopCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
+(ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
+(ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
+(ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
+(ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
+(ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
+(ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
+(ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
+(ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
+(ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
+(ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
+(ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
+(ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
(RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
(RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
(RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RoundFloat32x4 x) => (VROUNDPS128 [0] x)
-(RoundFloat32x8 x) => (VROUNDPS256 [0] x)
-(RoundFloat64x2 x) => (VROUNDPD128 [0] x)
-(RoundFloat64x4 x) => (VROUNDPD256 [0] x)
-(RoundScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
-(RoundScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
-(RoundScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
-(RoundScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
-(RoundScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
-(RoundScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RoundScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
-(RoundScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
-(RoundScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
-(RoundScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
-(RoundScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
-(RoundScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(RoundScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SaturatedAddDotProdInt32x4 ...) => (VPDPWSSDS128 ...)
-(SaturatedAddDotProdInt32x8 ...) => (VPDPWSSDS256 ...)
-(SaturatedAddDotProdInt32x16 ...) => (VPDPWSSDS512 ...)
-(SaturatedAddDotProdMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SaturatedAddDotProdMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SaturatedAddDotProdMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...)
-(SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...)
-(SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...)
-(SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...)
-(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...)
-(SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...)
-(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
+(RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
+(RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
+(RoundToEvenFloat64x4 x) => (VROUNDPD256 [0] x)
+(RoundToEvenScaledFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
+(RoundToEvenScaledFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
+(RoundToEvenScaledFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
+(RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
+(RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
+(RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
+(RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
+(RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
+(RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
+(RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
+(RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
+(RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
+(RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
(ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
(ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
(ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SignInt8x16 ...) => (VPSIGNB128 ...)
-(SignInt8x32 ...) => (VPSIGNB256 ...)
-(SignInt16x8 ...) => (VPSIGNW128 ...)
-(SignInt16x16 ...) => (VPSIGNW256 ...)
-(SignInt32x4 ...) => (VPSIGND128 ...)
-(SignInt32x8 ...) => (VPSIGND256 ...)
(SqrtFloat32x4 ...) => (VSQRTPS128 ...)
(SqrtFloat32x8 ...) => (VSQRTPS256 ...)
(SqrtFloat32x16 ...) => (VSQRTPS512 ...)
(SubSaturatedInt16x8 ...) => (VPSUBSW128 ...)
(SubSaturatedInt16x16 ...) => (VPSUBSW256 ...)
(SubSaturatedInt16x32 ...) => (VPSUBSW512 ...)
-(SubSaturatedUint8x16 ...) => (VPSUBSB128 ...)
-(SubSaturatedUint8x32 ...) => (VPSUBSB256 ...)
-(SubSaturatedUint8x64 ...) => (VPSUBSB512 ...)
-(SubSaturatedUint16x8 ...) => (VPSUBSW128 ...)
-(SubSaturatedUint16x16 ...) => (VPSUBSW256 ...)
-(SubSaturatedUint16x32 ...) => (VPSUBSW512 ...)
+(SubSaturatedUint8x16 ...) => (VPSUBUSB128 ...)
+(SubSaturatedUint8x32 ...) => (VPSUBUSB256 ...)
+(SubSaturatedUint8x64 ...) => (VPSUBUSB512 ...)
+(SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
+(SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
+(SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(TruncFloat32x4 x) => (VROUNDPS128 [3] x)
(TruncFloat32x8 x) => (VROUNDPS256 [3] x)
(TruncFloat64x2 x) => (VROUNDPD128 [3] x)
(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...)
-(UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...)
-(UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...)
-(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
(XorInt8x16 ...) => (VPXOR128 ...)
(XorInt8x32 ...) => (VPXOR256 ...)
(XorInt8x64 ...) => (VPXORD512 ...)
{name: "VPADDSWMasked128", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDSWMasked256", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPADDUSB128", argLength: 2, reg: v21, asm: "VPADDUSB", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPADDUSB256", argLength: 2, reg: v21, asm: "VPADDUSB", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPADDUSB512", argLength: 2, reg: w21, asm: "VPADDUSB", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPADDUSBMasked128", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPADDUSBMasked256", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPADDUSBMasked512", argLength: 3, reg: w2kw, asm: "VPADDUSB", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPADDUSW128", argLength: 2, reg: v21, asm: "VPADDUSW", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPADDUSW256", argLength: 2, reg: v21, asm: "VPADDUSW", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPADDUSW512", argLength: 2, reg: w21, asm: "VPADDUSW", commutative: true, typ: "Vec512", resultInArg0: false},
+ {name: "VPADDUSWMasked128", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec128", resultInArg0: false},
+ {name: "VPADDUSWMasked256", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec256", resultInArg0: false},
+ {name: "VPADDUSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDUSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDW128", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPADDW256", argLength: 2, reg: v21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULDQ128", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULDQ256", argLength: 2, reg: v21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULDQ512", argLength: 2, reg: w21, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false},
- {name: "VPMULDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VPMULDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
- {name: "VPMULHW128", argLength: 2, reg: v21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VPMULHW256", argLength: 2, reg: v21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULHW512", argLength: 2, reg: w21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
- {name: "VPMULHWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULHWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULHWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLD128", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULLD256", argLength: 2, reg: v21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULLD512", argLength: 2, reg: w21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULLWMasked512", argLength: 3, reg: w2kw, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
- {name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPOPCNTB128", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPOPCNTB256", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPOPCNTB512", argLength: 1, reg: w11, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPSUBUSB128", argLength: 2, reg: v21, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPSUBUSB256", argLength: 2, reg: v21, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPSUBUSB512", argLength: 2, reg: w21, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPSUBUSBMasked128", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPSUBUSBMasked256", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPSUBUSBMasked512", argLength: 3, reg: w2kw, asm: "VPSUBUSB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPSUBUSW128", argLength: 2, reg: v21, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPSUBUSW256", argLength: 2, reg: v21, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPSUBUSW512", argLength: 2, reg: w21, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPSUBUSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPSUBUSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPSUBUSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBUSW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VREDUCEPDMasked128", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPDMasked256", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VREDUCEPDMasked512", argLength: 2, reg: wkw, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
- {name: "VDPPS128", argLength: 2, reg: v21, asm: "VDPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VDPPS256", argLength: 2, reg: v21, asm: "VDPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VDPPD128", argLength: 2, reg: v21, asm: "VDPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
func simdGenericOps() []opData {
return []opData{
- {name: "AbsoluteInt8x16", argLength: 1, commutative: false},
- {name: "AbsoluteInt8x32", argLength: 1, commutative: false},
- {name: "AbsoluteInt8x64", argLength: 1, commutative: false},
- {name: "AbsoluteInt16x8", argLength: 1, commutative: false},
- {name: "AbsoluteInt16x16", argLength: 1, commutative: false},
- {name: "AbsoluteInt16x32", argLength: 1, commutative: false},
- {name: "AbsoluteInt32x4", argLength: 1, commutative: false},
- {name: "AbsoluteInt32x8", argLength: 1, commutative: false},
- {name: "AbsoluteInt32x16", argLength: 1, commutative: false},
- {name: "AbsoluteInt64x2", argLength: 1, commutative: false},
- {name: "AbsoluteInt64x4", argLength: 1, commutative: false},
- {name: "AbsoluteInt64x8", argLength: 1, commutative: false},
- {name: "AbsoluteMaskedInt8x16", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt8x32", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt8x64", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt16x8", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt16x16", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt16x32", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt32x4", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt32x8", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt32x16", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt64x2", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt64x4", argLength: 2, commutative: false},
- {name: "AbsoluteMaskedInt64x8", argLength: 2, commutative: false},
- {name: "AddDotProdInt32x4", argLength: 3, commutative: false},
- {name: "AddDotProdInt32x8", argLength: 3, commutative: false},
- {name: "AddDotProdInt32x16", argLength: 3, commutative: false},
- {name: "AddDotProdMaskedInt32x4", argLength: 4, commutative: false},
- {name: "AddDotProdMaskedInt32x8", argLength: 4, commutative: false},
- {name: "AddDotProdMaskedInt32x16", argLength: 4, commutative: false},
+ {name: "AbsInt8x16", argLength: 1, commutative: false},
+ {name: "AbsInt8x32", argLength: 1, commutative: false},
+ {name: "AbsInt8x64", argLength: 1, commutative: false},
+ {name: "AbsInt16x8", argLength: 1, commutative: false},
+ {name: "AbsInt16x16", argLength: 1, commutative: false},
+ {name: "AbsInt16x32", argLength: 1, commutative: false},
+ {name: "AbsInt32x4", argLength: 1, commutative: false},
+ {name: "AbsInt32x8", argLength: 1, commutative: false},
+ {name: "AbsInt32x16", argLength: 1, commutative: false},
+ {name: "AbsInt64x2", argLength: 1, commutative: false},
+ {name: "AbsInt64x4", argLength: 1, commutative: false},
+ {name: "AbsInt64x8", argLength: 1, commutative: false},
+ {name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
+ {name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
+ {name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
+ {name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
+ {name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
+ {name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
+ {name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
+ {name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
+ {name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
{name: "AddFloat32x4", argLength: 2, commutative: true},
{name: "AddFloat32x8", argLength: 2, commutative: true},
{name: "AddFloat32x16", argLength: 2, commutative: true},
{name: "AndUint64x2", argLength: 2, commutative: true},
{name: "AndUint64x4", argLength: 2, commutative: true},
{name: "AndUint64x8", argLength: 2, commutative: true},
- {name: "ApproximateReciprocalFloat32x4", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalFloat32x8", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalFloat32x16", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalFloat64x2", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalFloat64x4", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalFloat64x8", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat32x16", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtFloat64x8", argLength: 1, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", argLength: 2, commutative: false},
- {name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", argLength: 2, commutative: false},
{name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
{name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
{name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
{name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
{name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
{name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
+ {name: "CopySignInt8x16", argLength: 2, commutative: false},
+ {name: "CopySignInt8x32", argLength: 2, commutative: false},
+ {name: "CopySignInt16x8", argLength: 2, commutative: false},
+ {name: "CopySignInt16x16", argLength: 2, commutative: false},
+ {name: "CopySignInt32x4", argLength: 2, commutative: false},
+ {name: "CopySignInt32x8", argLength: 2, commutative: false},
{name: "DivFloat32x4", argLength: 2, commutative: false},
{name: "DivFloat32x8", argLength: 2, commutative: false},
{name: "DivFloat32x16", argLength: 2, commutative: false},
{name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
{name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
{name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
- {name: "DotProdBroadcastFloat32x4", argLength: 2, commutative: true},
- {name: "DotProdBroadcastFloat32x8", argLength: 2, commutative: true},
- {name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true},
+ {name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
+ {name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
+ {name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
+ {name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
+ {name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
+ {name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
+ {name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
+ {name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
+ {name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
+ {name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
+ {name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
+ {name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
{name: "EqualFloat32x4", argLength: 2, commutative: true},
{name: "EqualFloat32x8", argLength: 2, commutative: true},
{name: "EqualFloat32x16", argLength: 2, commutative: true},
{name: "FloorFloat32x8", argLength: 1, commutative: false},
{name: "FloorFloat64x2", argLength: 1, commutative: false},
{name: "FloorFloat64x4", argLength: 1, commutative: false},
- {name: "FusedMultiplyAddFloat32x4", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddFloat32x8", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddFloat32x16", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddFloat64x2", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddFloat64x4", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddFloat64x8", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat32x4", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat32x8", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat32x16", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat64x2", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat64x4", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddMaskedFloat64x8", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubFloat32x4", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubFloat32x8", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubFloat32x16", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubFloat64x2", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubFloat64x4", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubFloat64x8", argLength: 3, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat32x4", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat32x8", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat32x16", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat64x2", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat64x4", argLength: 4, commutative: false},
- {name: "FusedMultiplyAddSubMaskedFloat64x8", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddFloat32x4", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddFloat32x8", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddFloat32x16", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddFloat64x2", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddFloat64x4", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddFloat64x8", argLength: 3, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat32x4", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat32x8", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat32x16", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat64x2", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat64x4", argLength: 4, commutative: false},
- {name: "FusedMultiplySubAddMaskedFloat64x8", argLength: 4, commutative: false},
{name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
{name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
{name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
{name: "MinUint64x2", argLength: 2, commutative: true},
{name: "MinUint64x4", argLength: 2, commutative: true},
{name: "MinUint64x8", argLength: 2, commutative: true},
+ {name: "MulAddFloat32x4", argLength: 3, commutative: false},
+ {name: "MulAddFloat32x8", argLength: 3, commutative: false},
+ {name: "MulAddFloat32x16", argLength: 3, commutative: false},
+ {name: "MulAddFloat64x2", argLength: 3, commutative: false},
+ {name: "MulAddFloat64x4", argLength: 3, commutative: false},
+ {name: "MulAddFloat64x8", argLength: 3, commutative: false},
+ {name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
+ {name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
+ {name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
+ {name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
+ {name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
+ {name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
+ {name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
+ {name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
+ {name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
+ {name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
+ {name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
+ {name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
+ {name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
+ {name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
+ {name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
+ {name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
+ {name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
+ {name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
{name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
{name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
- {name: "MulEvenWidenInt64x2", argLength: 2, commutative: true},
- {name: "MulEvenWidenInt64x4", argLength: 2, commutative: true},
- {name: "MulEvenWidenInt64x8", argLength: 2, commutative: true},
- {name: "MulEvenWidenMaskedInt64x2", argLength: 3, commutative: true},
- {name: "MulEvenWidenMaskedInt64x4", argLength: 3, commutative: true},
- {name: "MulEvenWidenMaskedInt64x8", argLength: 3, commutative: true},
- {name: "MulEvenWidenMaskedUint64x2", argLength: 3, commutative: true},
- {name: "MulEvenWidenMaskedUint64x4", argLength: 3, commutative: true},
- {name: "MulEvenWidenMaskedUint64x8", argLength: 3, commutative: true},
{name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
{name: "MulEvenWidenUint32x8", argLength: 2, commutative: true},
- {name: "MulEvenWidenUint64x2", argLength: 2, commutative: true},
- {name: "MulEvenWidenUint64x4", argLength: 2, commutative: true},
- {name: "MulEvenWidenUint64x8", argLength: 2, commutative: true},
{name: "MulFloat32x4", argLength: 2, commutative: true},
{name: "MulFloat32x8", argLength: 2, commutative: true},
{name: "MulFloat32x16", argLength: 2, commutative: true},
{name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
{name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
{name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
- {name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
- {name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
- {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
- {name: "MulHighUint16x8", argLength: 2, commutative: true},
- {name: "MulHighUint16x16", argLength: 2, commutative: true},
- {name: "MulHighUint16x32", argLength: 2, commutative: true},
{name: "MulInt16x8", argLength: 2, commutative: true},
{name: "MulInt16x16", argLength: 2, commutative: true},
{name: "MulInt16x32", argLength: 2, commutative: true},
{name: "MulMaskedInt64x2", argLength: 3, commutative: true},
{name: "MulMaskedInt64x4", argLength: 3, commutative: true},
{name: "MulMaskedInt64x8", argLength: 3, commutative: true},
+ {name: "MulMaskedUint16x8", argLength: 3, commutative: true},
+ {name: "MulMaskedUint16x16", argLength: 3, commutative: true},
+ {name: "MulMaskedUint16x32", argLength: 3, commutative: true},
+ {name: "MulMaskedUint32x4", argLength: 3, commutative: true},
+ {name: "MulMaskedUint32x8", argLength: 3, commutative: true},
+ {name: "MulMaskedUint32x16", argLength: 3, commutative: true},
+ {name: "MulMaskedUint64x2", argLength: 3, commutative: true},
+ {name: "MulMaskedUint64x4", argLength: 3, commutative: true},
+ {name: "MulMaskedUint64x8", argLength: 3, commutative: true},
+ {name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
+ {name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
+ {name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
+ {name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
+ {name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
+ {name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
+ {name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
+ {name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
+ {name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
+ {name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
+ {name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
+ {name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
+ {name: "MulUint16x8", argLength: 2, commutative: true},
+ {name: "MulUint16x16", argLength: 2, commutative: true},
+ {name: "MulUint16x32", argLength: 2, commutative: true},
+ {name: "MulUint32x4", argLength: 2, commutative: true},
+ {name: "MulUint32x8", argLength: 2, commutative: true},
+ {name: "MulUint32x16", argLength: 2, commutative: true},
+ {name: "MulUint64x2", argLength: 2, commutative: true},
+ {name: "MulUint64x4", argLength: 2, commutative: true},
+ {name: "MulUint64x8", argLength: 2, commutative: true},
{name: "NotEqualFloat32x4", argLength: 2, commutative: true},
{name: "NotEqualFloat32x8", argLength: 2, commutative: true},
{name: "NotEqualFloat32x16", argLength: 2, commutative: true},
{name: "NotEqualUint64x2", argLength: 2, commutative: true},
{name: "NotEqualUint64x4", argLength: 2, commutative: true},
{name: "NotEqualUint64x8", argLength: 2, commutative: true},
+ {name: "OnesCountInt8x16", argLength: 1, commutative: false},
+ {name: "OnesCountInt8x32", argLength: 1, commutative: false},
+ {name: "OnesCountInt8x64", argLength: 1, commutative: false},
+ {name: "OnesCountInt16x8", argLength: 1, commutative: false},
+ {name: "OnesCountInt16x16", argLength: 1, commutative: false},
+ {name: "OnesCountInt16x32", argLength: 1, commutative: false},
+ {name: "OnesCountInt32x4", argLength: 1, commutative: false},
+ {name: "OnesCountInt32x8", argLength: 1, commutative: false},
+ {name: "OnesCountInt32x16", argLength: 1, commutative: false},
+ {name: "OnesCountInt64x2", argLength: 1, commutative: false},
+ {name: "OnesCountInt64x4", argLength: 1, commutative: false},
+ {name: "OnesCountInt64x8", argLength: 1, commutative: false},
+ {name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
+ {name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
+ {name: "OnesCountUint8x16", argLength: 1, commutative: false},
+ {name: "OnesCountUint8x32", argLength: 1, commutative: false},
+ {name: "OnesCountUint8x64", argLength: 1, commutative: false},
+ {name: "OnesCountUint16x8", argLength: 1, commutative: false},
+ {name: "OnesCountUint16x16", argLength: 1, commutative: false},
+ {name: "OnesCountUint16x32", argLength: 1, commutative: false},
+ {name: "OnesCountUint32x4", argLength: 1, commutative: false},
+ {name: "OnesCountUint32x8", argLength: 1, commutative: false},
+ {name: "OnesCountUint32x16", argLength: 1, commutative: false},
+ {name: "OnesCountUint64x2", argLength: 1, commutative: false},
+ {name: "OnesCountUint64x4", argLength: 1, commutative: false},
+ {name: "OnesCountUint64x8", argLength: 1, commutative: false},
{name: "OrInt8x16", argLength: 2, commutative: true},
{name: "OrInt8x32", argLength: 2, commutative: true},
{name: "OrInt8x64", argLength: 2, commutative: true},
{name: "OrUint64x2", argLength: 2, commutative: true},
{name: "OrUint64x4", argLength: 2, commutative: true},
{name: "OrUint64x8", argLength: 2, commutative: true},
- {name: "PairDotProdInt16x8", argLength: 2, commutative: false},
- {name: "PairDotProdInt16x16", argLength: 2, commutative: false},
- {name: "PairDotProdInt16x32", argLength: 2, commutative: false},
- {name: "PairDotProdMaskedInt16x8", argLength: 3, commutative: false},
- {name: "PairDotProdMaskedInt16x16", argLength: 3, commutative: false},
- {name: "PairDotProdMaskedInt16x32", argLength: 3, commutative: false},
{name: "Permute2Float32x4", argLength: 3, commutative: false},
{name: "Permute2Float32x8", argLength: 3, commutative: false},
{name: "Permute2Float32x16", argLength: 3, commutative: false},
{name: "PermuteUint32x16", argLength: 2, commutative: false},
{name: "PermuteUint64x4", argLength: 2, commutative: false},
{name: "PermuteUint64x8", argLength: 2, commutative: false},
- {name: "PopCountInt8x16", argLength: 1, commutative: false},
- {name: "PopCountInt8x32", argLength: 1, commutative: false},
- {name: "PopCountInt8x64", argLength: 1, commutative: false},
- {name: "PopCountInt16x8", argLength: 1, commutative: false},
- {name: "PopCountInt16x16", argLength: 1, commutative: false},
- {name: "PopCountInt16x32", argLength: 1, commutative: false},
- {name: "PopCountInt32x4", argLength: 1, commutative: false},
- {name: "PopCountInt32x8", argLength: 1, commutative: false},
- {name: "PopCountInt32x16", argLength: 1, commutative: false},
- {name: "PopCountInt64x2", argLength: 1, commutative: false},
- {name: "PopCountInt64x4", argLength: 1, commutative: false},
- {name: "PopCountInt64x8", argLength: 1, commutative: false},
- {name: "PopCountMaskedInt8x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt8x32", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt8x64", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt16x8", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt16x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt16x32", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt32x4", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt32x8", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt32x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt64x2", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt64x4", argLength: 2, commutative: false},
- {name: "PopCountMaskedInt64x8", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint8x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint8x32", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint8x64", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint32x4", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint32x8", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint64x2", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
- {name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
- {name: "PopCountUint8x16", argLength: 1, commutative: false},
- {name: "PopCountUint8x32", argLength: 1, commutative: false},
- {name: "PopCountUint8x64", argLength: 1, commutative: false},
- {name: "PopCountUint16x8", argLength: 1, commutative: false},
- {name: "PopCountUint16x16", argLength: 1, commutative: false},
- {name: "PopCountUint16x32", argLength: 1, commutative: false},
- {name: "PopCountUint32x4", argLength: 1, commutative: false},
- {name: "PopCountUint32x8", argLength: 1, commutative: false},
- {name: "PopCountUint32x16", argLength: 1, commutative: false},
- {name: "PopCountUint64x2", argLength: 1, commutative: false},
- {name: "PopCountUint64x4", argLength: 1, commutative: false},
- {name: "PopCountUint64x8", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat32x4", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat32x8", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat32x16", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
+ {name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
+ {name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
+ {name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
+ {name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
+ {name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
+ {name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
+ {name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
+ {name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
{name: "RotateLeftInt32x4", argLength: 2, commutative: false},
{name: "RotateLeftInt32x8", argLength: 2, commutative: false},
{name: "RotateLeftInt32x16", argLength: 2, commutative: false},
{name: "RotateRightUint64x2", argLength: 2, commutative: false},
{name: "RotateRightUint64x4", argLength: 2, commutative: false},
{name: "RotateRightUint64x8", argLength: 2, commutative: false},
- {name: "RoundFloat32x4", argLength: 1, commutative: false},
- {name: "RoundFloat32x8", argLength: 1, commutative: false},
- {name: "RoundFloat64x2", argLength: 1, commutative: false},
- {name: "RoundFloat64x4", argLength: 1, commutative: false},
- {name: "SaturatedAddDotProdInt32x4", argLength: 3, commutative: false},
- {name: "SaturatedAddDotProdInt32x8", argLength: 3, commutative: false},
- {name: "SaturatedAddDotProdInt32x16", argLength: 3, commutative: false},
- {name: "SaturatedAddDotProdMaskedInt32x4", argLength: 4, commutative: false},
- {name: "SaturatedAddDotProdMaskedInt32x8", argLength: 4, commutative: false},
- {name: "SaturatedAddDotProdMaskedInt32x16", argLength: 4, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLength: 2, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLength: 2, commutative: false},
- {name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLength: 2, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
- {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
+ {name: "RoundToEvenFloat32x4", argLength: 1, commutative: false},
+ {name: "RoundToEvenFloat32x8", argLength: 1, commutative: false},
+ {name: "RoundToEvenFloat64x2", argLength: 1, commutative: false},
+ {name: "RoundToEvenFloat64x4", argLength: 1, commutative: false},
{name: "ScaleFloat32x4", argLength: 2, commutative: false},
{name: "ScaleFloat32x8", argLength: 2, commutative: false},
{name: "ScaleFloat32x16", argLength: 2, commutative: false},
{name: "ShiftRightUint64x2", argLength: 2, commutative: false},
{name: "ShiftRightUint64x4", argLength: 2, commutative: false},
{name: "ShiftRightUint64x8", argLength: 2, commutative: false},
- {name: "SignInt8x16", argLength: 2, commutative: false},
- {name: "SignInt8x32", argLength: 2, commutative: false},
- {name: "SignInt16x8", argLength: 2, commutative: false},
- {name: "SignInt16x16", argLength: 2, commutative: false},
- {name: "SignInt32x4", argLength: 2, commutative: false},
- {name: "SignInt32x8", argLength: 2, commutative: false},
{name: "SqrtFloat32x4", argLength: 1, commutative: false},
{name: "SqrtFloat32x8", argLength: 1, commutative: false},
{name: "SqrtFloat32x16", argLength: 1, commutative: false},
{name: "TruncFloat32x8", argLength: 1, commutative: false},
{name: "TruncFloat64x2", argLength: 1, commutative: false},
{name: "TruncFloat64x4", argLength: 1, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
- {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
{name: "XorInt8x16", argLength: 2, commutative: true},
{name: "XorInt8x32", argLength: 2, commutative: true},
{name: "XorInt8x64", argLength: 2, commutative: true},
{name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
- {name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
OpAMD64VPADDSWMasked128
OpAMD64VPADDSWMasked256
OpAMD64VPADDSWMasked512
+ OpAMD64VPADDUSB128
+ OpAMD64VPADDUSB256
+ OpAMD64VPADDUSB512
+ OpAMD64VPADDUSBMasked128
+ OpAMD64VPADDUSBMasked256
+ OpAMD64VPADDUSBMasked512
+ OpAMD64VPADDUSW128
+ OpAMD64VPADDUSW256
+ OpAMD64VPADDUSW512
+ OpAMD64VPADDUSWMasked128
+ OpAMD64VPADDUSWMasked256
+ OpAMD64VPADDUSWMasked512
OpAMD64VPADDW128
OpAMD64VPADDW256
OpAMD64VPADDW512
OpAMD64VPMINUWMasked512
OpAMD64VPMULDQ128
OpAMD64VPMULDQ256
- OpAMD64VPMULDQ512
- OpAMD64VPMULDQMasked128
- OpAMD64VPMULDQMasked256
- OpAMD64VPMULDQMasked512
OpAMD64VPMULHUW128
OpAMD64VPMULHUW256
- OpAMD64VPMULHUW512
OpAMD64VPMULHUWMasked128
- OpAMD64VPMULHUWMasked256
OpAMD64VPMULHUWMasked512
- OpAMD64VPMULHW128
- OpAMD64VPMULHW256
OpAMD64VPMULHW512
- OpAMD64VPMULHWMasked128
OpAMD64VPMULHWMasked256
- OpAMD64VPMULHWMasked512
OpAMD64VPMULLD128
OpAMD64VPMULLD256
OpAMD64VPMULLD512
OpAMD64VPMULLWMasked512
OpAMD64VPMULUDQ128
OpAMD64VPMULUDQ256
- OpAMD64VPMULUDQ512
- OpAMD64VPMULUDQMasked128
- OpAMD64VPMULUDQMasked256
- OpAMD64VPMULUDQMasked512
OpAMD64VPOPCNTB128
OpAMD64VPOPCNTB256
OpAMD64VPOPCNTB512
OpAMD64VPSUBSWMasked128
OpAMD64VPSUBSWMasked256
OpAMD64VPSUBSWMasked512
+ OpAMD64VPSUBUSB128
+ OpAMD64VPSUBUSB256
+ OpAMD64VPSUBUSB512
+ OpAMD64VPSUBUSBMasked128
+ OpAMD64VPSUBUSBMasked256
+ OpAMD64VPSUBUSBMasked512
+ OpAMD64VPSUBUSW128
+ OpAMD64VPSUBUSW256
+ OpAMD64VPSUBUSW512
+ OpAMD64VPSUBUSWMasked128
+ OpAMD64VPSUBUSWMasked256
+ OpAMD64VPSUBUSWMasked512
OpAMD64VPSUBW128
OpAMD64VPSUBW256
OpAMD64VPSUBW512
OpAMD64VREDUCEPDMasked128
OpAMD64VREDUCEPDMasked256
OpAMD64VREDUCEPDMasked512
- OpAMD64VDPPS128
- OpAMD64VDPPS256
- OpAMD64VDPPD128
OpAMD64VCMPPS128
OpAMD64VCMPPS256
OpAMD64VCMPPS512
OpCvtMask64x2to8
OpCvtMask64x4to8
OpCvtMask64x8to8
- OpAbsoluteInt8x16
- OpAbsoluteInt8x32
- OpAbsoluteInt8x64
- OpAbsoluteInt16x8
- OpAbsoluteInt16x16
- OpAbsoluteInt16x32
- OpAbsoluteInt32x4
- OpAbsoluteInt32x8
- OpAbsoluteInt32x16
- OpAbsoluteInt64x2
- OpAbsoluteInt64x4
- OpAbsoluteInt64x8
- OpAbsoluteMaskedInt8x16
- OpAbsoluteMaskedInt8x32
- OpAbsoluteMaskedInt8x64
- OpAbsoluteMaskedInt16x8
- OpAbsoluteMaskedInt16x16
- OpAbsoluteMaskedInt16x32
- OpAbsoluteMaskedInt32x4
- OpAbsoluteMaskedInt32x8
- OpAbsoluteMaskedInt32x16
- OpAbsoluteMaskedInt64x2
- OpAbsoluteMaskedInt64x4
- OpAbsoluteMaskedInt64x8
- OpAddDotProdInt32x4
- OpAddDotProdInt32x8
- OpAddDotProdInt32x16
- OpAddDotProdMaskedInt32x4
- OpAddDotProdMaskedInt32x8
- OpAddDotProdMaskedInt32x16
+ OpAbsInt8x16
+ OpAbsInt8x32
+ OpAbsInt8x64
+ OpAbsInt16x8
+ OpAbsInt16x16
+ OpAbsInt16x32
+ OpAbsInt32x4
+ OpAbsInt32x8
+ OpAbsInt32x16
+ OpAbsInt64x2
+ OpAbsInt64x4
+ OpAbsInt64x8
+ OpAbsMaskedInt8x16
+ OpAbsMaskedInt8x32
+ OpAbsMaskedInt8x64
+ OpAbsMaskedInt16x8
+ OpAbsMaskedInt16x16
+ OpAbsMaskedInt16x32
+ OpAbsMaskedInt32x4
+ OpAbsMaskedInt32x8
+ OpAbsMaskedInt32x16
+ OpAbsMaskedInt64x2
+ OpAbsMaskedInt64x4
+ OpAbsMaskedInt64x8
+ OpAddDotProdPairsSaturatedInt32x4
+ OpAddDotProdPairsSaturatedInt32x8
+ OpAddDotProdPairsSaturatedInt32x16
+ OpAddDotProdPairsSaturatedMaskedInt32x4
+ OpAddDotProdPairsSaturatedMaskedInt32x8
+ OpAddDotProdPairsSaturatedMaskedInt32x16
+ OpAddDotProdQuadrupleInt32x4
+ OpAddDotProdQuadrupleInt32x8
+ OpAddDotProdQuadrupleInt32x16
+ OpAddDotProdQuadrupleMaskedInt32x4
+ OpAddDotProdQuadrupleMaskedInt32x8
+ OpAddDotProdQuadrupleMaskedInt32x16
+ OpAddDotProdQuadrupleSaturatedInt32x4
+ OpAddDotProdQuadrupleSaturatedInt32x8
+ OpAddDotProdQuadrupleSaturatedInt32x16
+ OpAddDotProdQuadrupleSaturatedMaskedInt32x4
+ OpAddDotProdQuadrupleSaturatedMaskedInt32x8
+ OpAddDotProdQuadrupleSaturatedMaskedInt32x16
OpAddFloat32x4
OpAddFloat32x8
OpAddFloat32x16
OpAndUint64x2
OpAndUint64x4
OpAndUint64x8
- OpApproximateReciprocalFloat32x4
- OpApproximateReciprocalFloat32x8
- OpApproximateReciprocalFloat32x16
- OpApproximateReciprocalFloat64x2
- OpApproximateReciprocalFloat64x4
- OpApproximateReciprocalFloat64x8
- OpApproximateReciprocalMaskedFloat32x4
- OpApproximateReciprocalMaskedFloat32x8
- OpApproximateReciprocalMaskedFloat32x16
- OpApproximateReciprocalMaskedFloat64x2
- OpApproximateReciprocalMaskedFloat64x4
- OpApproximateReciprocalMaskedFloat64x8
- OpApproximateReciprocalOfSqrtFloat32x4
- OpApproximateReciprocalOfSqrtFloat32x8
- OpApproximateReciprocalOfSqrtFloat32x16
- OpApproximateReciprocalOfSqrtFloat64x2
- OpApproximateReciprocalOfSqrtFloat64x4
- OpApproximateReciprocalOfSqrtFloat64x8
- OpApproximateReciprocalOfSqrtMaskedFloat32x4
- OpApproximateReciprocalOfSqrtMaskedFloat32x8
- OpApproximateReciprocalOfSqrtMaskedFloat32x16
- OpApproximateReciprocalOfSqrtMaskedFloat64x2
- OpApproximateReciprocalOfSqrtMaskedFloat64x4
- OpApproximateReciprocalOfSqrtMaskedFloat64x8
OpAverageMaskedUint8x16
OpAverageMaskedUint8x32
OpAverageMaskedUint8x64
OpConvertToUint32MaskedFloat32x4
OpConvertToUint32MaskedFloat32x8
OpConvertToUint32MaskedFloat32x16
+ OpCopySignInt8x16
+ OpCopySignInt8x32
+ OpCopySignInt16x8
+ OpCopySignInt16x16
+ OpCopySignInt32x4
+ OpCopySignInt32x8
OpDivFloat32x4
OpDivFloat32x8
OpDivFloat32x16
OpDivMaskedFloat64x2
OpDivMaskedFloat64x4
OpDivMaskedFloat64x8
- OpDotProdBroadcastFloat32x4
- OpDotProdBroadcastFloat32x8
- OpDotProdBroadcastFloat64x2
+ OpDotProdPairsInt16x8
+ OpDotProdPairsInt16x16
+ OpDotProdPairsInt16x32
+ OpDotProdPairsMaskedInt16x8
+ OpDotProdPairsMaskedInt16x16
+ OpDotProdPairsMaskedInt16x32
+ OpDotProdPairsSaturatedMaskedUint8x16
+ OpDotProdPairsSaturatedMaskedUint8x32
+ OpDotProdPairsSaturatedMaskedUint8x64
+ OpDotProdPairsSaturatedUint8x16
+ OpDotProdPairsSaturatedUint8x32
+ OpDotProdPairsSaturatedUint8x64
OpEqualFloat32x4
OpEqualFloat32x8
OpEqualFloat32x16
OpFloorFloat32x8
OpFloorFloat64x2
OpFloorFloat64x4
- OpFusedMultiplyAddFloat32x4
- OpFusedMultiplyAddFloat32x8
- OpFusedMultiplyAddFloat32x16
- OpFusedMultiplyAddFloat64x2
- OpFusedMultiplyAddFloat64x4
- OpFusedMultiplyAddFloat64x8
- OpFusedMultiplyAddMaskedFloat32x4
- OpFusedMultiplyAddMaskedFloat32x8
- OpFusedMultiplyAddMaskedFloat32x16
- OpFusedMultiplyAddMaskedFloat64x2
- OpFusedMultiplyAddMaskedFloat64x4
- OpFusedMultiplyAddMaskedFloat64x8
- OpFusedMultiplyAddSubFloat32x4
- OpFusedMultiplyAddSubFloat32x8
- OpFusedMultiplyAddSubFloat32x16
- OpFusedMultiplyAddSubFloat64x2
- OpFusedMultiplyAddSubFloat64x4
- OpFusedMultiplyAddSubFloat64x8
- OpFusedMultiplyAddSubMaskedFloat32x4
- OpFusedMultiplyAddSubMaskedFloat32x8
- OpFusedMultiplyAddSubMaskedFloat32x16
- OpFusedMultiplyAddSubMaskedFloat64x2
- OpFusedMultiplyAddSubMaskedFloat64x4
- OpFusedMultiplyAddSubMaskedFloat64x8
- OpFusedMultiplySubAddFloat32x4
- OpFusedMultiplySubAddFloat32x8
- OpFusedMultiplySubAddFloat32x16
- OpFusedMultiplySubAddFloat64x2
- OpFusedMultiplySubAddFloat64x4
- OpFusedMultiplySubAddFloat64x8
- OpFusedMultiplySubAddMaskedFloat32x4
- OpFusedMultiplySubAddMaskedFloat32x8
- OpFusedMultiplySubAddMaskedFloat32x16
- OpFusedMultiplySubAddMaskedFloat64x2
- OpFusedMultiplySubAddMaskedFloat64x4
- OpFusedMultiplySubAddMaskedFloat64x8
OpGaloisFieldMulMaskedUint8x16
OpGaloisFieldMulMaskedUint8x32
OpGaloisFieldMulMaskedUint8x64
OpMinUint64x2
OpMinUint64x4
OpMinUint64x8
+ OpMulAddFloat32x4
+ OpMulAddFloat32x8
+ OpMulAddFloat32x16
+ OpMulAddFloat64x2
+ OpMulAddFloat64x4
+ OpMulAddFloat64x8
+ OpMulAddMaskedFloat32x4
+ OpMulAddMaskedFloat32x8
+ OpMulAddMaskedFloat32x16
+ OpMulAddMaskedFloat64x2
+ OpMulAddMaskedFloat64x4
+ OpMulAddMaskedFloat64x8
+ OpMulAddSubFloat32x4
+ OpMulAddSubFloat32x8
+ OpMulAddSubFloat32x16
+ OpMulAddSubFloat64x2
+ OpMulAddSubFloat64x4
+ OpMulAddSubFloat64x8
+ OpMulAddSubMaskedFloat32x4
+ OpMulAddSubMaskedFloat32x8
+ OpMulAddSubMaskedFloat32x16
+ OpMulAddSubMaskedFloat64x2
+ OpMulAddSubMaskedFloat64x4
+ OpMulAddSubMaskedFloat64x8
OpMulEvenWidenInt32x4
OpMulEvenWidenInt32x8
- OpMulEvenWidenInt64x2
- OpMulEvenWidenInt64x4
- OpMulEvenWidenInt64x8
- OpMulEvenWidenMaskedInt64x2
- OpMulEvenWidenMaskedInt64x4
- OpMulEvenWidenMaskedInt64x8
- OpMulEvenWidenMaskedUint64x2
- OpMulEvenWidenMaskedUint64x4
- OpMulEvenWidenMaskedUint64x8
OpMulEvenWidenUint32x4
OpMulEvenWidenUint32x8
- OpMulEvenWidenUint64x2
- OpMulEvenWidenUint64x4
- OpMulEvenWidenUint64x8
OpMulFloat32x4
OpMulFloat32x8
OpMulFloat32x16
OpMulHighMaskedInt16x8
OpMulHighMaskedInt16x16
OpMulHighMaskedInt16x32
- OpMulHighMaskedUint16x8
- OpMulHighMaskedUint16x16
- OpMulHighMaskedUint16x32
- OpMulHighUint16x8
- OpMulHighUint16x16
- OpMulHighUint16x32
OpMulInt16x8
OpMulInt16x16
OpMulInt16x32
OpMulMaskedInt64x2
OpMulMaskedInt64x4
OpMulMaskedInt64x8
+ OpMulMaskedUint16x8
+ OpMulMaskedUint16x16
+ OpMulMaskedUint16x32
+ OpMulMaskedUint32x4
+ OpMulMaskedUint32x8
+ OpMulMaskedUint32x16
+ OpMulMaskedUint64x2
+ OpMulMaskedUint64x4
+ OpMulMaskedUint64x8
+ OpMulSubAddFloat32x4
+ OpMulSubAddFloat32x8
+ OpMulSubAddFloat32x16
+ OpMulSubAddFloat64x2
+ OpMulSubAddFloat64x4
+ OpMulSubAddFloat64x8
+ OpMulSubAddMaskedFloat32x4
+ OpMulSubAddMaskedFloat32x8
+ OpMulSubAddMaskedFloat32x16
+ OpMulSubAddMaskedFloat64x2
+ OpMulSubAddMaskedFloat64x4
+ OpMulSubAddMaskedFloat64x8
+ OpMulUint16x8
+ OpMulUint16x16
+ OpMulUint16x32
+ OpMulUint32x4
+ OpMulUint32x8
+ OpMulUint32x16
+ OpMulUint64x2
+ OpMulUint64x4
+ OpMulUint64x8
OpNotEqualFloat32x4
OpNotEqualFloat32x8
OpNotEqualFloat32x16
OpNotEqualUint64x2
OpNotEqualUint64x4
OpNotEqualUint64x8
+ OpOnesCountInt8x16
+ OpOnesCountInt8x32
+ OpOnesCountInt8x64
+ OpOnesCountInt16x8
+ OpOnesCountInt16x16
+ OpOnesCountInt16x32
+ OpOnesCountInt32x4
+ OpOnesCountInt32x8
+ OpOnesCountInt32x16
+ OpOnesCountInt64x2
+ OpOnesCountInt64x4
+ OpOnesCountInt64x8
+ OpOnesCountMaskedInt8x16
+ OpOnesCountMaskedInt8x32
+ OpOnesCountMaskedInt8x64
+ OpOnesCountMaskedInt16x8
+ OpOnesCountMaskedInt16x16
+ OpOnesCountMaskedInt16x32
+ OpOnesCountMaskedInt32x4
+ OpOnesCountMaskedInt32x8
+ OpOnesCountMaskedInt32x16
+ OpOnesCountMaskedInt64x2
+ OpOnesCountMaskedInt64x4
+ OpOnesCountMaskedInt64x8
+ OpOnesCountMaskedUint8x16
+ OpOnesCountMaskedUint8x32
+ OpOnesCountMaskedUint8x64
+ OpOnesCountMaskedUint16x8
+ OpOnesCountMaskedUint16x16
+ OpOnesCountMaskedUint16x32
+ OpOnesCountMaskedUint32x4
+ OpOnesCountMaskedUint32x8
+ OpOnesCountMaskedUint32x16
+ OpOnesCountMaskedUint64x2
+ OpOnesCountMaskedUint64x4
+ OpOnesCountMaskedUint64x8
+ OpOnesCountUint8x16
+ OpOnesCountUint8x32
+ OpOnesCountUint8x64
+ OpOnesCountUint16x8
+ OpOnesCountUint16x16
+ OpOnesCountUint16x32
+ OpOnesCountUint32x4
+ OpOnesCountUint32x8
+ OpOnesCountUint32x16
+ OpOnesCountUint64x2
+ OpOnesCountUint64x4
+ OpOnesCountUint64x8
OpOrInt8x16
OpOrInt8x32
OpOrInt8x64
OpOrUint64x2
OpOrUint64x4
OpOrUint64x8
- OpPairDotProdInt16x8
- OpPairDotProdInt16x16
- OpPairDotProdInt16x32
- OpPairDotProdMaskedInt16x8
- OpPairDotProdMaskedInt16x16
- OpPairDotProdMaskedInt16x32
OpPermute2Float32x4
OpPermute2Float32x8
OpPermute2Float32x16
OpPermuteUint32x16
OpPermuteUint64x4
OpPermuteUint64x8
- OpPopCountInt8x16
- OpPopCountInt8x32
- OpPopCountInt8x64
- OpPopCountInt16x8
- OpPopCountInt16x16
- OpPopCountInt16x32
- OpPopCountInt32x4
- OpPopCountInt32x8
- OpPopCountInt32x16
- OpPopCountInt64x2
- OpPopCountInt64x4
- OpPopCountInt64x8
- OpPopCountMaskedInt8x16
- OpPopCountMaskedInt8x32
- OpPopCountMaskedInt8x64
- OpPopCountMaskedInt16x8
- OpPopCountMaskedInt16x16
- OpPopCountMaskedInt16x32
- OpPopCountMaskedInt32x4
- OpPopCountMaskedInt32x8
- OpPopCountMaskedInt32x16
- OpPopCountMaskedInt64x2
- OpPopCountMaskedInt64x4
- OpPopCountMaskedInt64x8
- OpPopCountMaskedUint8x16
- OpPopCountMaskedUint8x32
- OpPopCountMaskedUint8x64
- OpPopCountMaskedUint16x8
- OpPopCountMaskedUint16x16
- OpPopCountMaskedUint16x32
- OpPopCountMaskedUint32x4
- OpPopCountMaskedUint32x8
- OpPopCountMaskedUint32x16
- OpPopCountMaskedUint64x2
- OpPopCountMaskedUint64x4
- OpPopCountMaskedUint64x8
- OpPopCountUint8x16
- OpPopCountUint8x32
- OpPopCountUint8x64
- OpPopCountUint16x8
- OpPopCountUint16x16
- OpPopCountUint16x32
- OpPopCountUint32x4
- OpPopCountUint32x8
- OpPopCountUint32x16
- OpPopCountUint64x2
- OpPopCountUint64x4
- OpPopCountUint64x8
+ OpReciprocalFloat32x4
+ OpReciprocalFloat32x8
+ OpReciprocalFloat32x16
+ OpReciprocalFloat64x2
+ OpReciprocalFloat64x4
+ OpReciprocalFloat64x8
+ OpReciprocalMaskedFloat32x4
+ OpReciprocalMaskedFloat32x8
+ OpReciprocalMaskedFloat32x16
+ OpReciprocalMaskedFloat64x2
+ OpReciprocalMaskedFloat64x4
+ OpReciprocalMaskedFloat64x8
+ OpReciprocalSqrtFloat32x4
+ OpReciprocalSqrtFloat32x8
+ OpReciprocalSqrtFloat32x16
+ OpReciprocalSqrtFloat64x2
+ OpReciprocalSqrtFloat64x4
+ OpReciprocalSqrtFloat64x8
+ OpReciprocalSqrtMaskedFloat32x4
+ OpReciprocalSqrtMaskedFloat32x8
+ OpReciprocalSqrtMaskedFloat32x16
+ OpReciprocalSqrtMaskedFloat64x2
+ OpReciprocalSqrtMaskedFloat64x4
+ OpReciprocalSqrtMaskedFloat64x8
OpRotateLeftInt32x4
OpRotateLeftInt32x8
OpRotateLeftInt32x16
OpRotateRightUint64x2
OpRotateRightUint64x4
OpRotateRightUint64x8
- OpRoundFloat32x4
- OpRoundFloat32x8
- OpRoundFloat64x2
- OpRoundFloat64x4
- OpSaturatedAddDotProdInt32x4
- OpSaturatedAddDotProdInt32x8
- OpSaturatedAddDotProdInt32x16
- OpSaturatedAddDotProdMaskedInt32x4
- OpSaturatedAddDotProdMaskedInt32x8
- OpSaturatedAddDotProdMaskedInt32x16
- OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16
- OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32
- OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64
- OpSaturatedUnsignedSignedPairDotProdUint8x16
- OpSaturatedUnsignedSignedPairDotProdUint8x32
- OpSaturatedUnsignedSignedPairDotProdUint8x64
- OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
- OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
- OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
- OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4
- OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8
- OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16
+ OpRoundToEvenFloat32x4
+ OpRoundToEvenFloat32x8
+ OpRoundToEvenFloat64x2
+ OpRoundToEvenFloat64x4
OpScaleFloat32x4
OpScaleFloat32x8
OpScaleFloat32x16
OpShiftRightUint64x2
OpShiftRightUint64x4
OpShiftRightUint64x8
- OpSignInt8x16
- OpSignInt8x32
- OpSignInt16x8
- OpSignInt16x16
- OpSignInt32x4
- OpSignInt32x8
OpSqrtFloat32x4
OpSqrtFloat32x8
OpSqrtFloat32x16
OpTruncFloat32x8
OpTruncFloat64x2
OpTruncFloat64x4
- OpUnsignedSignedQuadDotProdAccumulateInt32x4
- OpUnsignedSignedQuadDotProdAccumulateInt32x8
- OpUnsignedSignedQuadDotProdAccumulateInt32x16
- OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4
- OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8
- OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16
OpXorInt8x16
OpXorInt8x32
OpXorInt8x64
OpRotateAllRightUint64x2
OpRotateAllRightUint64x4
OpRotateAllRightUint64x8
- OpRoundScaledFloat32x4
- OpRoundScaledFloat32x8
- OpRoundScaledFloat32x16
- OpRoundScaledFloat64x2
- OpRoundScaledFloat64x4
- OpRoundScaledFloat64x8
- OpRoundScaledMaskedFloat32x4
- OpRoundScaledMaskedFloat32x8
- OpRoundScaledMaskedFloat32x16
- OpRoundScaledMaskedFloat64x2
- OpRoundScaledMaskedFloat64x4
- OpRoundScaledMaskedFloat64x8
- OpRoundScaledResidueFloat32x4
- OpRoundScaledResidueFloat32x8
- OpRoundScaledResidueFloat32x16
- OpRoundScaledResidueFloat64x2
- OpRoundScaledResidueFloat64x4
- OpRoundScaledResidueFloat64x8
- OpRoundScaledResidueMaskedFloat32x4
- OpRoundScaledResidueMaskedFloat32x8
- OpRoundScaledResidueMaskedFloat32x16
- OpRoundScaledResidueMaskedFloat64x2
- OpRoundScaledResidueMaskedFloat64x4
- OpRoundScaledResidueMaskedFloat64x8
+ OpRoundToEvenScaledFloat32x4
+ OpRoundToEvenScaledFloat32x8
+ OpRoundToEvenScaledFloat32x16
+ OpRoundToEvenScaledFloat64x2
+ OpRoundToEvenScaledFloat64x4
+ OpRoundToEvenScaledFloat64x8
+ OpRoundToEvenScaledMaskedFloat32x4
+ OpRoundToEvenScaledMaskedFloat32x8
+ OpRoundToEvenScaledMaskedFloat32x16
+ OpRoundToEvenScaledMaskedFloat64x2
+ OpRoundToEvenScaledMaskedFloat64x4
+ OpRoundToEvenScaledMaskedFloat64x8
+ OpRoundToEvenScaledResidueFloat32x4
+ OpRoundToEvenScaledResidueFloat32x8
+ OpRoundToEvenScaledResidueFloat32x16
+ OpRoundToEvenScaledResidueFloat64x2
+ OpRoundToEvenScaledResidueFloat64x4
+ OpRoundToEvenScaledResidueFloat64x8
+ OpRoundToEvenScaledResidueMaskedFloat32x4
+ OpRoundToEvenScaledResidueMaskedFloat32x8
+ OpRoundToEvenScaledResidueMaskedFloat32x16
+ OpRoundToEvenScaledResidueMaskedFloat64x2
+ OpRoundToEvenScaledResidueMaskedFloat64x4
+ OpRoundToEvenScaledResidueMaskedFloat64x8
OpSetElemInt8x16
OpSetElemInt16x8
OpSetElemInt32x4
},
},
},
+ {
+ name: "VPADDUSB128",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSB256",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSB512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPADDUSBMasked128",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSBMasked256",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSBMasked512",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSW128",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSW256",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSW512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPADDUSWMasked128",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSWMasked256",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPADDUSWMasked512",
+ argLen: 3,
+ commutative: true,
+ asm: x86.AVPADDUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPADDW128",
argLen: 2,
},
},
},
- {
- name: "VPMULDQ512",
- argLen: 2,
- commutative: true,
- asm: x86.AVPMULDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPMULDQMasked128",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPMULDQMasked256",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPMULDQMasked512",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPMULHUW128",
argLen: 2,
},
},
},
- {
- name: "VPMULHUW512",
- argLen: 2,
- commutative: true,
- asm: x86.AVPMULHUW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
{
name: "VPMULHUWMasked128",
argLen: 3,
},
},
},
- {
- name: "VPMULHUWMasked256",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULHUW,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPMULHUWMasked512",
argLen: 3,
},
},
},
- {
- name: "VPMULHW128",
- argLen: 2,
- commutative: true,
- asm: x86.AVPMULHW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPMULHW256",
- argLen: 2,
- commutative: true,
- asm: x86.AVPMULHW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPMULHW512",
argLen: 2,
},
},
},
- {
- name: "VPMULHWMasked128",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULHW,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPMULHWMasked256",
argLen: 3,
},
},
},
- {
- name: "VPMULHWMasked512",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULHW,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPMULLD128",
argLen: 2,
},
},
},
- {
- name: "VPMULUDQ512",
- argLen: 2,
- commutative: true,
- asm: x86.AVPMULUDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPMULUDQMasked128",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULUDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPMULUDQMasked256",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULUDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPMULUDQMasked512",
- argLen: 3,
- commutative: true,
- asm: x86.AVPMULUDQ,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VPOPCNTB128",
argLen: 1,
},
},
},
+ {
+ name: "VPSUBUSB128",
+ argLen: 2,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSB256",
+ argLen: 2,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSB512",
+ argLen: 2,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPSUBUSBMasked128",
+ argLen: 3,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSBMasked256",
+ argLen: 3,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSBMasked512",
+ argLen: 3,
+ asm: x86.AVPSUBUSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSW128",
+ argLen: 2,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSW256",
+ argLen: 2,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSW512",
+ argLen: 2,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPSUBUSWMasked128",
+ argLen: 3,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSWMasked256",
+ argLen: 3,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VPSUBUSWMasked512",
+ argLen: 3,
+ asm: x86.AVPSUBUSW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPSUBW128",
argLen: 2,
},
},
},
- {
- name: "VDPPS128",
- auxType: auxInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVDPPS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VDPPS256",
- auxType: auxInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVDPPS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VDPPD128",
- auxType: auxInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVDPPD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
{
name: "VCMPPS128",
auxType: auxInt8,
generic: true,
},
{
- name: "AbsoluteInt8x16",
+ name: "AbsInt8x16",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt8x32",
+ name: "AbsInt8x32",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt8x64",
+ name: "AbsInt8x64",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt16x8",
+ name: "AbsInt16x8",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt16x16",
+ name: "AbsInt16x16",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt16x32",
+ name: "AbsInt16x32",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt32x4",
+ name: "AbsInt32x4",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt32x8",
+ name: "AbsInt32x8",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt32x16",
+ name: "AbsInt32x16",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt64x2",
+ name: "AbsInt64x2",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt64x4",
+ name: "AbsInt64x4",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteInt64x8",
+ name: "AbsInt64x8",
argLen: 1,
generic: true,
},
{
- name: "AbsoluteMaskedInt8x16",
+ name: "AbsMaskedInt8x16",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt8x32",
+ name: "AbsMaskedInt8x32",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt8x64",
+ name: "AbsMaskedInt8x64",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt16x8",
+ name: "AbsMaskedInt16x8",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt16x16",
+ name: "AbsMaskedInt16x16",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt16x32",
+ name: "AbsMaskedInt16x32",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt32x4",
+ name: "AbsMaskedInt32x4",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt32x8",
+ name: "AbsMaskedInt32x8",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt32x16",
+ name: "AbsMaskedInt32x16",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt64x2",
+ name: "AbsMaskedInt64x2",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt64x4",
+ name: "AbsMaskedInt64x4",
argLen: 2,
generic: true,
},
{
- name: "AbsoluteMaskedInt64x8",
+ name: "AbsMaskedInt64x8",
argLen: 2,
generic: true,
},
{
- name: "AddDotProdInt32x4",
+ name: "AddDotProdPairsSaturatedInt32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdPairsSaturatedInt32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdPairsSaturatedInt32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdPairsSaturatedMaskedInt32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdPairsSaturatedMaskedInt32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdPairsSaturatedMaskedInt32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleInt32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleInt32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleInt32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleMaskedInt32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleMaskedInt32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleMaskedInt32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "AddDotProdQuadrupleSaturatedInt32x4",
argLen: 3,
generic: true,
},
{
- name: "AddDotProdInt32x8",
+ name: "AddDotProdQuadrupleSaturatedInt32x8",
argLen: 3,
generic: true,
},
{
- name: "AddDotProdInt32x16",
+ name: "AddDotProdQuadrupleSaturatedInt32x16",
argLen: 3,
generic: true,
},
{
- name: "AddDotProdMaskedInt32x4",
+ name: "AddDotProdQuadrupleSaturatedMaskedInt32x4",
argLen: 4,
generic: true,
},
{
- name: "AddDotProdMaskedInt32x8",
+ name: "AddDotProdQuadrupleSaturatedMaskedInt32x8",
argLen: 4,
generic: true,
},
{
- name: "AddDotProdMaskedInt32x16",
+ name: "AddDotProdQuadrupleSaturatedMaskedInt32x16",
argLen: 4,
generic: true,
},
commutative: true,
generic: true,
},
- {
- name: "ApproximateReciprocalFloat32x4",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalFloat32x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalFloat32x16",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalFloat64x2",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalFloat64x4",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalFloat64x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalMaskedFloat64x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat32x4",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat32x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat32x16",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat64x2",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat64x4",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtFloat64x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "ApproximateReciprocalOfSqrtMaskedFloat64x8",
- argLen: 2,
- generic: true,
- },
{
name: "AverageMaskedUint8x16",
argLen: 3,
argLen: 2,
generic: true,
},
+ {
+ name: "CopySignInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "CopySignInt8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "CopySignInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "CopySignInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "CopySignInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "CopySignInt32x8",
+ argLen: 2,
+ generic: true,
+ },
{
name: "DivFloat32x4",
argLen: 2,
generic: true,
},
{
- name: "DotProdBroadcastFloat32x4",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "DotProdPairsInt16x8",
+ argLen: 2,
+ generic: true,
},
{
- name: "DotProdBroadcastFloat32x8",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "DotProdPairsInt16x16",
+ argLen: 2,
+ generic: true,
},
{
- name: "DotProdBroadcastFloat64x2",
- argLen: 2,
- commutative: true,
- generic: true,
+ name: "DotProdPairsInt16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsMaskedInt16x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsMaskedInt16x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsMaskedInt16x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedMaskedUint8x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedMaskedUint8x32",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedMaskedUint8x64",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "DotProdPairsSaturatedUint8x64",
+ argLen: 2,
+ generic: true,
},
{
name: "EqualFloat32x4",
argLen: 1,
generic: true,
},
- {
- name: "FusedMultiplyAddFloat32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddFloat32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddFloat32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddFloat64x2",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddFloat64x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddFloat64x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat32x16",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat64x2",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat64x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddMaskedFloat64x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat64x2",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat64x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubFloat64x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat32x16",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat64x2",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat64x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplyAddSubMaskedFloat64x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat64x2",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat64x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddFloat64x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat32x16",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat64x2",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat64x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "FusedMultiplySubAddMaskedFloat64x8",
- argLen: 4,
- generic: true,
- },
{
name: "GaloisFieldMulMaskedUint8x16",
argLen: 3,
commutative: true,
generic: true,
},
+ {
+ name: "MulAddFloat32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddFloat32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddFloat32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddFloat64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddFloat64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddFloat64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat64x2",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddMaskedFloat64x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubFloat64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat64x2",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulAddSubMaskedFloat64x8",
+ argLen: 4,
+ generic: true,
+ },
{
name: "MulEvenWidenInt32x4",
argLen: 2,
generic: true,
},
{
- name: "MulEvenWidenInt64x2",
+ name: "MulEvenWidenUint32x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenInt64x4",
+ name: "MulEvenWidenUint32x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenInt64x8",
+ name: "MulFloat32x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedInt64x2",
- argLen: 3,
+ name: "MulFloat32x8",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedInt64x4",
- argLen: 3,
+ name: "MulFloat32x16",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedInt64x8",
- argLen: 3,
+ name: "MulFloat64x2",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedUint64x2",
- argLen: 3,
+ name: "MulFloat64x4",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedUint64x4",
- argLen: 3,
+ name: "MulFloat64x8",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenMaskedUint64x8",
- argLen: 3,
+ name: "MulHighInt16x8",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenUint32x4",
+ name: "MulHighInt16x16",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenUint32x8",
+ name: "MulHighInt16x32",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenUint64x2",
- argLen: 2,
+ name: "MulHighMaskedInt16x8",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenUint64x4",
- argLen: 2,
+ name: "MulHighMaskedInt16x16",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulEvenWidenUint64x8",
- argLen: 2,
+ name: "MulHighMaskedInt16x32",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulFloat32x4",
+ name: "MulInt16x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulFloat32x8",
+ name: "MulInt16x16",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulFloat32x16",
+ name: "MulInt16x32",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulFloat64x2",
+ name: "MulInt32x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulFloat64x4",
+ name: "MulInt32x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulFloat64x8",
+ name: "MulInt32x16",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulHighInt16x8",
+ name: "MulInt64x2",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulHighInt16x16",
+ name: "MulInt64x4",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulHighInt16x32",
+ name: "MulInt64x8",
argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedInt16x8",
+ name: "MulMaskedFloat32x4",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedInt16x16",
+ name: "MulMaskedFloat32x8",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedInt16x32",
+ name: "MulMaskedFloat32x16",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedUint16x8",
+ name: "MulMaskedFloat64x2",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedUint16x16",
+ name: "MulMaskedFloat64x4",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighMaskedUint16x32",
+ name: "MulMaskedFloat64x8",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighUint16x8",
- argLen: 2,
+ name: "MulMaskedInt16x8",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighUint16x16",
- argLen: 2,
+ name: "MulMaskedInt16x16",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulHighUint16x32",
- argLen: 2,
+ name: "MulMaskedInt16x32",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt16x8",
- argLen: 2,
+ name: "MulMaskedInt32x4",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt16x16",
- argLen: 2,
+ name: "MulMaskedInt32x8",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt16x32",
- argLen: 2,
+ name: "MulMaskedInt32x16",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt32x4",
- argLen: 2,
+ name: "MulMaskedInt64x2",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt32x8",
- argLen: 2,
+ name: "MulMaskedInt64x4",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt32x16",
- argLen: 2,
+ name: "MulMaskedInt64x8",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt64x2",
- argLen: 2,
+ name: "MulMaskedUint16x8",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt64x4",
- argLen: 2,
+ name: "MulMaskedUint16x16",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulInt64x8",
- argLen: 2,
+ name: "MulMaskedUint16x32",
+ argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat32x4",
+ name: "MulMaskedUint32x4",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat32x8",
+ name: "MulMaskedUint32x8",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat32x16",
+ name: "MulMaskedUint32x16",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat64x2",
+ name: "MulMaskedUint64x2",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat64x4",
+ name: "MulMaskedUint64x4",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedFloat64x8",
+ name: "MulMaskedUint64x8",
argLen: 3,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt16x8",
- argLen: 3,
+ name: "MulSubAddFloat32x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddFloat32x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddFloat32x16",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddFloat64x2",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddFloat64x4",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddFloat64x8",
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat32x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat32x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat32x16",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat64x2",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat64x4",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulSubAddMaskedFloat64x8",
+ argLen: 4,
+ generic: true,
+ },
+ {
+ name: "MulUint16x8",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt16x16",
- argLen: 3,
+ name: "MulUint16x16",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt16x32",
- argLen: 3,
+ name: "MulUint16x32",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt32x4",
- argLen: 3,
+ name: "MulUint32x4",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt32x8",
- argLen: 3,
+ name: "MulUint32x8",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt32x16",
- argLen: 3,
+ name: "MulUint32x16",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt64x2",
- argLen: 3,
+ name: "MulUint64x2",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt64x4",
- argLen: 3,
+ name: "MulUint64x4",
+ argLen: 2,
commutative: true,
generic: true,
},
{
- name: "MulMaskedInt64x8",
- argLen: 3,
+ name: "MulUint64x8",
+ argLen: 2,
commutative: true,
generic: true,
},
commutative: true,
generic: true,
},
+ {
+ name: "OnesCountInt8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt8x32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt8x64",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt16x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt16x32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountInt64x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedInt64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint8x64",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint16x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint16x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint16x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint32x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint32x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint32x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint64x2",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint64x4",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountMaskedUint64x8",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint8x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint8x32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint8x64",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint16x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint16x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint16x32",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "OnesCountUint64x8",
+ argLen: 1,
+ generic: true,
+ },
{
name: "OrInt8x16",
argLen: 2,
commutative: true,
generic: true,
},
- {
- name: "PairDotProdInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairDotProdInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairDotProdInt16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "PairDotProdMaskedInt16x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "PairDotProdMaskedInt16x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "PairDotProdMaskedInt16x32",
- argLen: 3,
- generic: true,
- },
{
name: "Permute2Float32x4",
argLen: 3,
generic: true,
},
{
- name: "PopCountInt8x16",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountInt8x32",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountInt8x64",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountInt16x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountInt16x16",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountInt16x32",
+ name: "ReciprocalFloat32x4",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt32x4",
+ name: "ReciprocalFloat32x8",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt32x8",
+ name: "ReciprocalFloat32x16",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt32x16",
+ name: "ReciprocalFloat64x2",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt64x2",
+ name: "ReciprocalFloat64x4",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt64x4",
+ name: "ReciprocalFloat64x8",
argLen: 1,
generic: true,
},
{
- name: "PopCountInt64x8",
- argLen: 1,
- generic: true,
- },
- {
- name: "PopCountMaskedInt8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedInt32x4",
+ name: "ReciprocalMaskedFloat32x4",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedInt32x8",
+ name: "ReciprocalMaskedFloat32x8",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedInt32x16",
+ name: "ReciprocalMaskedFloat32x16",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedInt64x2",
+ name: "ReciprocalMaskedFloat64x2",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedInt64x4",
+ name: "ReciprocalMaskedFloat64x4",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedInt64x8",
+ name: "ReciprocalMaskedFloat64x8",
argLen: 2,
generic: true,
},
{
- name: "PopCountMaskedUint8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountMaskedUint64x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "PopCountUint8x16",
+ name: "ReciprocalSqrtFloat32x4",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint8x32",
+ name: "ReciprocalSqrtFloat32x8",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint8x64",
+ name: "ReciprocalSqrtFloat32x16",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint16x8",
+ name: "ReciprocalSqrtFloat64x2",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint16x16",
+ name: "ReciprocalSqrtFloat64x4",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint16x32",
+ name: "ReciprocalSqrtFloat64x8",
argLen: 1,
generic: true,
},
{
- name: "PopCountUint32x4",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat32x4",
+ argLen: 2,
generic: true,
},
{
- name: "PopCountUint32x8",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat32x8",
+ argLen: 2,
generic: true,
},
{
- name: "PopCountUint32x16",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat32x16",
+ argLen: 2,
generic: true,
},
{
- name: "PopCountUint64x2",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat64x2",
+ argLen: 2,
generic: true,
},
{
- name: "PopCountUint64x4",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat64x4",
+ argLen: 2,
generic: true,
},
{
- name: "PopCountUint64x8",
- argLen: 1,
+ name: "ReciprocalSqrtMaskedFloat64x8",
+ argLen: 2,
generic: true,
},
{
generic: true,
},
{
- name: "RoundFloat32x4",
+ name: "RoundToEvenFloat32x4",
argLen: 1,
generic: true,
},
{
- name: "RoundFloat32x8",
+ name: "RoundToEvenFloat32x8",
argLen: 1,
generic: true,
},
{
- name: "RoundFloat64x2",
+ name: "RoundToEvenFloat64x2",
argLen: 1,
generic: true,
},
{
- name: "RoundFloat64x4",
+ name: "RoundToEvenFloat64x4",
argLen: 1,
generic: true,
},
- {
- name: "SaturatedAddDotProdInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedAddDotProdInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedAddDotProdInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedAddDotProdMaskedInt32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedAddDotProdMaskedInt32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedAddDotProdMaskedInt32x16",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedPairDotProdUint8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16",
- argLen: 4,
- generic: true,
- },
{
name: "ScaleFloat32x4",
argLen: 2,
argLen: 2,
generic: true,
},
- {
- name: "SignInt8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SignInt8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "SignInt16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "SignInt16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "SignInt32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "SignInt32x8",
- argLen: 2,
- generic: true,
- },
{
name: "SqrtFloat32x4",
argLen: 1,
argLen: 1,
generic: true,
},
- {
- name: "UnsignedSignedQuadDotProdAccumulateInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "UnsignedSignedQuadDotProdAccumulateInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "UnsignedSignedQuadDotProdAccumulateInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x4",
- argLen: 4,
- generic: true,
- },
- {
- name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x8",
- argLen: 4,
- generic: true,
- },
- {
- name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16",
- argLen: 4,
- generic: true,
- },
{
name: "XorInt8x16",
argLen: 2,
generic: true,
},
{
- name: "RoundScaledFloat32x4",
+ name: "RoundToEvenScaledFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledFloat32x8",
+ name: "RoundToEvenScaledFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledFloat32x16",
+ name: "RoundToEvenScaledFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledFloat64x2",
+ name: "RoundToEvenScaledFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledFloat64x4",
+ name: "RoundToEvenScaledFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledFloat64x8",
+ name: "RoundToEvenScaledFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledMaskedFloat32x4",
+ name: "RoundToEvenScaledMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledMaskedFloat32x8",
+ name: "RoundToEvenScaledMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledMaskedFloat32x16",
+ name: "RoundToEvenScaledMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledMaskedFloat64x2",
+ name: "RoundToEvenScaledMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledMaskedFloat64x4",
+ name: "RoundToEvenScaledMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledMaskedFloat64x8",
+ name: "RoundToEvenScaledMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueFloat32x4",
+ name: "RoundToEvenScaledResidueFloat32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueFloat32x8",
+ name: "RoundToEvenScaledResidueFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueFloat32x16",
+ name: "RoundToEvenScaledResidueFloat32x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueFloat64x2",
+ name: "RoundToEvenScaledResidueFloat64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueFloat64x4",
+ name: "RoundToEvenScaledResidueFloat64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueFloat64x8",
+ name: "RoundToEvenScaledResidueFloat64x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat32x4",
+ name: "RoundToEvenScaledResidueMaskedFloat32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat32x8",
+ name: "RoundToEvenScaledResidueMaskedFloat32x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat32x16",
+ name: "RoundToEvenScaledResidueMaskedFloat32x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat64x2",
+ name: "RoundToEvenScaledResidueMaskedFloat64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat64x4",
+ name: "RoundToEvenScaledResidueMaskedFloat64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
- name: "RoundScaledResidueMaskedFloat64x8",
+ name: "RoundToEvenScaledResidueMaskedFloat64x8",
auxType: auxInt8,
argLen: 2,
generic: true,
return rewriteValueAMD64_OpAMD64XORQload(v)
case OpAMD64XORQmodify:
return rewriteValueAMD64_OpAMD64XORQmodify(v)
- case OpAbsoluteInt16x16:
+ case OpAbsInt16x16:
v.Op = OpAMD64VPABSW256
return true
- case OpAbsoluteInt16x32:
+ case OpAbsInt16x32:
v.Op = OpAMD64VPABSW512
return true
- case OpAbsoluteInt16x8:
+ case OpAbsInt16x8:
v.Op = OpAMD64VPABSW128
return true
- case OpAbsoluteInt32x16:
+ case OpAbsInt32x16:
v.Op = OpAMD64VPABSD512
return true
- case OpAbsoluteInt32x4:
+ case OpAbsInt32x4:
v.Op = OpAMD64VPABSD128
return true
- case OpAbsoluteInt32x8:
+ case OpAbsInt32x8:
v.Op = OpAMD64VPABSD256
return true
- case OpAbsoluteInt64x2:
+ case OpAbsInt64x2:
v.Op = OpAMD64VPABSQ128
return true
- case OpAbsoluteInt64x4:
+ case OpAbsInt64x4:
v.Op = OpAMD64VPABSQ256
return true
- case OpAbsoluteInt64x8:
+ case OpAbsInt64x8:
v.Op = OpAMD64VPABSQ512
return true
- case OpAbsoluteInt8x16:
+ case OpAbsInt8x16:
v.Op = OpAMD64VPABSB128
return true
- case OpAbsoluteInt8x32:
+ case OpAbsInt8x32:
v.Op = OpAMD64VPABSB256
return true
- case OpAbsoluteInt8x64:
+ case OpAbsInt8x64:
v.Op = OpAMD64VPABSB512
return true
- case OpAbsoluteMaskedInt16x16:
- return rewriteValueAMD64_OpAbsoluteMaskedInt16x16(v)
- case OpAbsoluteMaskedInt16x32:
- return rewriteValueAMD64_OpAbsoluteMaskedInt16x32(v)
- case OpAbsoluteMaskedInt16x8:
- return rewriteValueAMD64_OpAbsoluteMaskedInt16x8(v)
- case OpAbsoluteMaskedInt32x16:
- return rewriteValueAMD64_OpAbsoluteMaskedInt32x16(v)
- case OpAbsoluteMaskedInt32x4:
- return rewriteValueAMD64_OpAbsoluteMaskedInt32x4(v)
- case OpAbsoluteMaskedInt32x8:
- return rewriteValueAMD64_OpAbsoluteMaskedInt32x8(v)
- case OpAbsoluteMaskedInt64x2:
- return rewriteValueAMD64_OpAbsoluteMaskedInt64x2(v)
- case OpAbsoluteMaskedInt64x4:
- return rewriteValueAMD64_OpAbsoluteMaskedInt64x4(v)
- case OpAbsoluteMaskedInt64x8:
- return rewriteValueAMD64_OpAbsoluteMaskedInt64x8(v)
- case OpAbsoluteMaskedInt8x16:
- return rewriteValueAMD64_OpAbsoluteMaskedInt8x16(v)
- case OpAbsoluteMaskedInt8x32:
- return rewriteValueAMD64_OpAbsoluteMaskedInt8x32(v)
- case OpAbsoluteMaskedInt8x64:
- return rewriteValueAMD64_OpAbsoluteMaskedInt8x64(v)
+ case OpAbsMaskedInt16x16:
+ return rewriteValueAMD64_OpAbsMaskedInt16x16(v)
+ case OpAbsMaskedInt16x32:
+ return rewriteValueAMD64_OpAbsMaskedInt16x32(v)
+ case OpAbsMaskedInt16x8:
+ return rewriteValueAMD64_OpAbsMaskedInt16x8(v)
+ case OpAbsMaskedInt32x16:
+ return rewriteValueAMD64_OpAbsMaskedInt32x16(v)
+ case OpAbsMaskedInt32x4:
+ return rewriteValueAMD64_OpAbsMaskedInt32x4(v)
+ case OpAbsMaskedInt32x8:
+ return rewriteValueAMD64_OpAbsMaskedInt32x8(v)
+ case OpAbsMaskedInt64x2:
+ return rewriteValueAMD64_OpAbsMaskedInt64x2(v)
+ case OpAbsMaskedInt64x4:
+ return rewriteValueAMD64_OpAbsMaskedInt64x4(v)
+ case OpAbsMaskedInt64x8:
+ return rewriteValueAMD64_OpAbsMaskedInt64x8(v)
+ case OpAbsMaskedInt8x16:
+ return rewriteValueAMD64_OpAbsMaskedInt8x16(v)
+ case OpAbsMaskedInt8x32:
+ return rewriteValueAMD64_OpAbsMaskedInt8x32(v)
+ case OpAbsMaskedInt8x64:
+ return rewriteValueAMD64_OpAbsMaskedInt8x64(v)
case OpAdd16:
v.Op = OpAMD64ADDL
return true
case OpAdd8:
v.Op = OpAMD64ADDL
return true
- case OpAddDotProdInt32x16:
- v.Op = OpAMD64VPDPWSSD512
+ case OpAddDotProdPairsSaturatedInt32x16:
+ v.Op = OpAMD64VPDPWSSDS512
+ return true
+ case OpAddDotProdPairsSaturatedInt32x4:
+ v.Op = OpAMD64VPDPWSSDS128
+ return true
+ case OpAddDotProdPairsSaturatedInt32x8:
+ v.Op = OpAMD64VPDPWSSDS256
+ return true
+ case OpAddDotProdPairsSaturatedMaskedInt32x16:
+ return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v)
+ case OpAddDotProdPairsSaturatedMaskedInt32x4:
+ return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v)
+ case OpAddDotProdPairsSaturatedMaskedInt32x8:
+ return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v)
+ case OpAddDotProdQuadrupleInt32x16:
+ v.Op = OpAMD64VPDPBUSD512
+ return true
+ case OpAddDotProdQuadrupleInt32x4:
+ v.Op = OpAMD64VPDPBUSD128
return true
- case OpAddDotProdInt32x4:
- v.Op = OpAMD64VPDPWSSD128
+ case OpAddDotProdQuadrupleInt32x8:
+ v.Op = OpAMD64VPDPBUSD256
return true
- case OpAddDotProdInt32x8:
- v.Op = OpAMD64VPDPWSSD256
+ case OpAddDotProdQuadrupleMaskedInt32x16:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v)
+ case OpAddDotProdQuadrupleMaskedInt32x4:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v)
+ case OpAddDotProdQuadrupleMaskedInt32x8:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v)
+ case OpAddDotProdQuadrupleSaturatedInt32x16:
+ v.Op = OpAMD64VPDPBUSDS512
return true
- case OpAddDotProdMaskedInt32x16:
- return rewriteValueAMD64_OpAddDotProdMaskedInt32x16(v)
- case OpAddDotProdMaskedInt32x4:
- return rewriteValueAMD64_OpAddDotProdMaskedInt32x4(v)
- case OpAddDotProdMaskedInt32x8:
- return rewriteValueAMD64_OpAddDotProdMaskedInt32x8(v)
+ case OpAddDotProdQuadrupleSaturatedInt32x4:
+ v.Op = OpAMD64VPDPBUSDS128
+ return true
+ case OpAddDotProdQuadrupleSaturatedInt32x8:
+ v.Op = OpAMD64VPDPBUSDS256
+ return true
+ case OpAddDotProdQuadrupleSaturatedMaskedInt32x16:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v)
+ case OpAddDotProdQuadrupleSaturatedMaskedInt32x4:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v)
+ case OpAddDotProdQuadrupleSaturatedMaskedInt32x8:
+ return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v)
case OpAddFloat32x16:
v.Op = OpAMD64VADDPS512
return true
case OpAddSaturatedMaskedUint8x64:
return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
case OpAddSaturatedUint16x16:
- v.Op = OpAMD64VPADDSW256
+ v.Op = OpAMD64VPADDUSW256
return true
case OpAddSaturatedUint16x32:
- v.Op = OpAMD64VPADDSW512
+ v.Op = OpAMD64VPADDUSW512
return true
case OpAddSaturatedUint16x8:
- v.Op = OpAMD64VPADDSW128
+ v.Op = OpAMD64VPADDUSW128
return true
case OpAddSaturatedUint8x16:
- v.Op = OpAMD64VPADDSB128
+ v.Op = OpAMD64VPADDUSB128
return true
case OpAddSaturatedUint8x32:
- v.Op = OpAMD64VPADDSB256
+ v.Op = OpAMD64VPADDUSB256
return true
case OpAddSaturatedUint8x64:
- v.Op = OpAMD64VPADDSB512
+ v.Op = OpAMD64VPADDUSB512
return true
case OpAddSubFloat32x4:
v.Op = OpAMD64VADDSUBPS128
case OpAndUint8x64:
v.Op = OpAMD64VPANDD512
return true
- case OpApproximateReciprocalFloat32x16:
- v.Op = OpAMD64VRCP14PS512
- return true
- case OpApproximateReciprocalFloat32x4:
- v.Op = OpAMD64VRCPPS128
- return true
- case OpApproximateReciprocalFloat32x8:
- v.Op = OpAMD64VRCPPS256
- return true
- case OpApproximateReciprocalFloat64x2:
- v.Op = OpAMD64VRCP14PD128
- return true
- case OpApproximateReciprocalFloat64x4:
- v.Op = OpAMD64VRCP14PD256
- return true
- case OpApproximateReciprocalFloat64x8:
- v.Op = OpAMD64VRCP14PD512
- return true
- case OpApproximateReciprocalMaskedFloat32x16:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x16(v)
- case OpApproximateReciprocalMaskedFloat32x4:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x4(v)
- case OpApproximateReciprocalMaskedFloat32x8:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x8(v)
- case OpApproximateReciprocalMaskedFloat64x2:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x2(v)
- case OpApproximateReciprocalMaskedFloat64x4:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x4(v)
- case OpApproximateReciprocalMaskedFloat64x8:
- return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x8(v)
- case OpApproximateReciprocalOfSqrtFloat32x16:
- v.Op = OpAMD64VRSQRT14PS512
- return true
- case OpApproximateReciprocalOfSqrtFloat32x4:
- v.Op = OpAMD64VRSQRTPS128
- return true
- case OpApproximateReciprocalOfSqrtFloat32x8:
- v.Op = OpAMD64VRSQRTPS256
- return true
- case OpApproximateReciprocalOfSqrtFloat64x2:
- v.Op = OpAMD64VRSQRT14PD128
- return true
- case OpApproximateReciprocalOfSqrtFloat64x4:
- v.Op = OpAMD64VRSQRT14PD256
- return true
- case OpApproximateReciprocalOfSqrtFloat64x8:
- v.Op = OpAMD64VRSQRT14PD512
- return true
- case OpApproximateReciprocalOfSqrtMaskedFloat32x16:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x16(v)
- case OpApproximateReciprocalOfSqrtMaskedFloat32x4:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x4(v)
- case OpApproximateReciprocalOfSqrtMaskedFloat32x8:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x8(v)
- case OpApproximateReciprocalOfSqrtMaskedFloat64x2:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x2(v)
- case OpApproximateReciprocalOfSqrtMaskedFloat64x4:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x4(v)
- case OpApproximateReciprocalOfSqrtMaskedFloat64x8:
- return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x8(v)
case OpAtomicAdd32:
return rewriteValueAMD64_OpAtomicAdd32(v)
case OpAtomicAdd64:
return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
case OpConvertToUint32MaskedFloat32x8:
return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
+ case OpCopySignInt16x16:
+ v.Op = OpAMD64VPSIGNW256
+ return true
+ case OpCopySignInt16x8:
+ v.Op = OpAMD64VPSIGNW128
+ return true
+ case OpCopySignInt32x4:
+ v.Op = OpAMD64VPSIGND128
+ return true
+ case OpCopySignInt32x8:
+ v.Op = OpAMD64VPSIGND256
+ return true
+ case OpCopySignInt8x16:
+ v.Op = OpAMD64VPSIGNB128
+ return true
+ case OpCopySignInt8x32:
+ v.Op = OpAMD64VPSIGNB256
+ return true
case OpCtz16:
return rewriteValueAMD64_OpCtz16(v)
case OpCtz16NonZero:
return rewriteValueAMD64_OpDivMaskedFloat64x4(v)
case OpDivMaskedFloat64x8:
return rewriteValueAMD64_OpDivMaskedFloat64x8(v)
- case OpDotProdBroadcastFloat32x4:
- return rewriteValueAMD64_OpDotProdBroadcastFloat32x4(v)
- case OpDotProdBroadcastFloat32x8:
- return rewriteValueAMD64_OpDotProdBroadcastFloat32x8(v)
- case OpDotProdBroadcastFloat64x2:
- return rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v)
+ case OpDotProdPairsInt16x16:
+ v.Op = OpAMD64VPMADDWD256
+ return true
+ case OpDotProdPairsInt16x32:
+ v.Op = OpAMD64VPMADDWD512
+ return true
+ case OpDotProdPairsInt16x8:
+ v.Op = OpAMD64VPMADDWD128
+ return true
+ case OpDotProdPairsMaskedInt16x16:
+ return rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v)
+ case OpDotProdPairsMaskedInt16x32:
+ return rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v)
+ case OpDotProdPairsMaskedInt16x8:
+ return rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v)
+ case OpDotProdPairsSaturatedMaskedUint8x16:
+ return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v)
+ case OpDotProdPairsSaturatedMaskedUint8x32:
+ return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v)
+ case OpDotProdPairsSaturatedMaskedUint8x64:
+ return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v)
+ case OpDotProdPairsSaturatedUint8x16:
+ v.Op = OpAMD64VPMADDUBSW128
+ return true
+ case OpDotProdPairsSaturatedUint8x32:
+ v.Op = OpAMD64VPMADDUBSW256
+ return true
+ case OpDotProdPairsSaturatedUint8x64:
+ v.Op = OpAMD64VPMADDUBSW512
+ return true
case OpEq16:
return rewriteValueAMD64_OpEq16(v)
case OpEq32:
return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
case OpFloorScaledResidueMaskedFloat64x8:
return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
- case OpFusedMultiplyAddFloat32x16:
- v.Op = OpAMD64VFMADD213PS512
- return true
- case OpFusedMultiplyAddFloat32x4:
- v.Op = OpAMD64VFMADD213PS128
- return true
- case OpFusedMultiplyAddFloat32x8:
- v.Op = OpAMD64VFMADD213PS256
- return true
- case OpFusedMultiplyAddFloat64x2:
- v.Op = OpAMD64VFMADD213PD128
- return true
- case OpFusedMultiplyAddFloat64x4:
- v.Op = OpAMD64VFMADD213PD256
- return true
- case OpFusedMultiplyAddFloat64x8:
- v.Op = OpAMD64VFMADD213PD512
- return true
- case OpFusedMultiplyAddMaskedFloat32x16:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x16(v)
- case OpFusedMultiplyAddMaskedFloat32x4:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x4(v)
- case OpFusedMultiplyAddMaskedFloat32x8:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x8(v)
- case OpFusedMultiplyAddMaskedFloat64x2:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x2(v)
- case OpFusedMultiplyAddMaskedFloat64x4:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x4(v)
- case OpFusedMultiplyAddMaskedFloat64x8:
- return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x8(v)
- case OpFusedMultiplyAddSubFloat32x16:
- v.Op = OpAMD64VFMADDSUB213PS512
- return true
- case OpFusedMultiplyAddSubFloat32x4:
- v.Op = OpAMD64VFMADDSUB213PS128
- return true
- case OpFusedMultiplyAddSubFloat32x8:
- v.Op = OpAMD64VFMADDSUB213PS256
- return true
- case OpFusedMultiplyAddSubFloat64x2:
- v.Op = OpAMD64VFMADDSUB213PD128
- return true
- case OpFusedMultiplyAddSubFloat64x4:
- v.Op = OpAMD64VFMADDSUB213PD256
- return true
- case OpFusedMultiplyAddSubFloat64x8:
- v.Op = OpAMD64VFMADDSUB213PD512
- return true
- case OpFusedMultiplyAddSubMaskedFloat32x16:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x16(v)
- case OpFusedMultiplyAddSubMaskedFloat32x4:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x4(v)
- case OpFusedMultiplyAddSubMaskedFloat32x8:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x8(v)
- case OpFusedMultiplyAddSubMaskedFloat64x2:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x2(v)
- case OpFusedMultiplyAddSubMaskedFloat64x4:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x4(v)
- case OpFusedMultiplyAddSubMaskedFloat64x8:
- return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x8(v)
- case OpFusedMultiplySubAddFloat32x16:
- v.Op = OpAMD64VFMSUBADD213PS512
- return true
- case OpFusedMultiplySubAddFloat32x4:
- v.Op = OpAMD64VFMSUBADD213PS128
- return true
- case OpFusedMultiplySubAddFloat32x8:
- v.Op = OpAMD64VFMSUBADD213PS256
- return true
- case OpFusedMultiplySubAddFloat64x2:
- v.Op = OpAMD64VFMSUBADD213PD128
- return true
- case OpFusedMultiplySubAddFloat64x4:
- v.Op = OpAMD64VFMSUBADD213PD256
- return true
- case OpFusedMultiplySubAddFloat64x8:
- v.Op = OpAMD64VFMSUBADD213PD512
- return true
- case OpFusedMultiplySubAddMaskedFloat32x16:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x16(v)
- case OpFusedMultiplySubAddMaskedFloat32x4:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x4(v)
- case OpFusedMultiplySubAddMaskedFloat32x8:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x8(v)
- case OpFusedMultiplySubAddMaskedFloat64x2:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x2(v)
- case OpFusedMultiplySubAddMaskedFloat64x4:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x4(v)
- case OpFusedMultiplySubAddMaskedFloat64x8:
- return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x8(v)
case OpGaloisFieldAffineTransformInverseMaskedUint8x16:
return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v)
case OpGaloisFieldAffineTransformInverseMaskedUint8x32:
case OpMul8:
v.Op = OpAMD64MULL
return true
- case OpMulEvenWidenInt32x4:
- v.Op = OpAMD64VPMULDQ128
+ case OpMulAddFloat32x16:
+ v.Op = OpAMD64VFMADD213PS512
return true
- case OpMulEvenWidenInt32x8:
- v.Op = OpAMD64VPMULDQ256
+ case OpMulAddFloat32x4:
+ v.Op = OpAMD64VFMADD213PS128
+ return true
+ case OpMulAddFloat32x8:
+ v.Op = OpAMD64VFMADD213PS256
+ return true
+ case OpMulAddFloat64x2:
+ v.Op = OpAMD64VFMADD213PD128
+ return true
+ case OpMulAddFloat64x4:
+ v.Op = OpAMD64VFMADD213PD256
return true
- case OpMulEvenWidenInt64x2:
+ case OpMulAddFloat64x8:
+ v.Op = OpAMD64VFMADD213PD512
+ return true
+ case OpMulAddMaskedFloat32x16:
+ return rewriteValueAMD64_OpMulAddMaskedFloat32x16(v)
+ case OpMulAddMaskedFloat32x4:
+ return rewriteValueAMD64_OpMulAddMaskedFloat32x4(v)
+ case OpMulAddMaskedFloat32x8:
+ return rewriteValueAMD64_OpMulAddMaskedFloat32x8(v)
+ case OpMulAddMaskedFloat64x2:
+ return rewriteValueAMD64_OpMulAddMaskedFloat64x2(v)
+ case OpMulAddMaskedFloat64x4:
+ return rewriteValueAMD64_OpMulAddMaskedFloat64x4(v)
+ case OpMulAddMaskedFloat64x8:
+ return rewriteValueAMD64_OpMulAddMaskedFloat64x8(v)
+ case OpMulAddSubFloat32x16:
+ v.Op = OpAMD64VFMADDSUB213PS512
+ return true
+ case OpMulAddSubFloat32x4:
+ v.Op = OpAMD64VFMADDSUB213PS128
+ return true
+ case OpMulAddSubFloat32x8:
+ v.Op = OpAMD64VFMADDSUB213PS256
+ return true
+ case OpMulAddSubFloat64x2:
+ v.Op = OpAMD64VFMADDSUB213PD128
+ return true
+ case OpMulAddSubFloat64x4:
+ v.Op = OpAMD64VFMADDSUB213PD256
+ return true
+ case OpMulAddSubFloat64x8:
+ v.Op = OpAMD64VFMADDSUB213PD512
+ return true
+ case OpMulAddSubMaskedFloat32x16:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v)
+ case OpMulAddSubMaskedFloat32x4:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v)
+ case OpMulAddSubMaskedFloat32x8:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v)
+ case OpMulAddSubMaskedFloat64x2:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v)
+ case OpMulAddSubMaskedFloat64x4:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v)
+ case OpMulAddSubMaskedFloat64x8:
+ return rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v)
+ case OpMulEvenWidenInt32x4:
v.Op = OpAMD64VPMULDQ128
return true
- case OpMulEvenWidenInt64x4:
+ case OpMulEvenWidenInt32x8:
v.Op = OpAMD64VPMULDQ256
return true
- case OpMulEvenWidenInt64x8:
- v.Op = OpAMD64VPMULDQ512
- return true
- case OpMulEvenWidenMaskedInt64x2:
- return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x2(v)
- case OpMulEvenWidenMaskedInt64x4:
- return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x4(v)
- case OpMulEvenWidenMaskedInt64x8:
- return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x8(v)
- case OpMulEvenWidenMaskedUint64x2:
- return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x2(v)
- case OpMulEvenWidenMaskedUint64x4:
- return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x4(v)
- case OpMulEvenWidenMaskedUint64x8:
- return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x8(v)
case OpMulEvenWidenUint32x4:
v.Op = OpAMD64VPMULUDQ128
return true
case OpMulEvenWidenUint32x8:
v.Op = OpAMD64VPMULUDQ256
return true
- case OpMulEvenWidenUint64x2:
- v.Op = OpAMD64VPMULUDQ128
- return true
- case OpMulEvenWidenUint64x4:
- v.Op = OpAMD64VPMULUDQ256
- return true
- case OpMulEvenWidenUint64x8:
- v.Op = OpAMD64VPMULUDQ512
- return true
case OpMulFloat32x16:
v.Op = OpAMD64VMULPS512
return true
v.Op = OpAMD64VMULPD512
return true
case OpMulHighInt16x16:
- v.Op = OpAMD64VPMULHW256
+ v.Op = OpAMD64VPMULHUW256
return true
case OpMulHighInt16x32:
v.Op = OpAMD64VPMULHW512
return true
case OpMulHighInt16x8:
- v.Op = OpAMD64VPMULHW128
+ v.Op = OpAMD64VPMULHUW128
return true
case OpMulHighMaskedInt16x16:
return rewriteValueAMD64_OpMulHighMaskedInt16x16(v)
return rewriteValueAMD64_OpMulHighMaskedInt16x32(v)
case OpMulHighMaskedInt16x8:
return rewriteValueAMD64_OpMulHighMaskedInt16x8(v)
- case OpMulHighMaskedUint16x16:
- return rewriteValueAMD64_OpMulHighMaskedUint16x16(v)
- case OpMulHighMaskedUint16x32:
- return rewriteValueAMD64_OpMulHighMaskedUint16x32(v)
- case OpMulHighMaskedUint16x8:
- return rewriteValueAMD64_OpMulHighMaskedUint16x8(v)
- case OpMulHighUint16x16:
- v.Op = OpAMD64VPMULHUW256
- return true
- case OpMulHighUint16x32:
- v.Op = OpAMD64VPMULHUW512
- return true
- case OpMulHighUint16x8:
- v.Op = OpAMD64VPMULHUW128
- return true
case OpMulInt16x16:
v.Op = OpAMD64VPMULLW256
return true
return rewriteValueAMD64_OpMulMaskedInt64x4(v)
case OpMulMaskedInt64x8:
return rewriteValueAMD64_OpMulMaskedInt64x8(v)
+ case OpMulMaskedUint16x16:
+ return rewriteValueAMD64_OpMulMaskedUint16x16(v)
+ case OpMulMaskedUint16x32:
+ return rewriteValueAMD64_OpMulMaskedUint16x32(v)
+ case OpMulMaskedUint16x8:
+ return rewriteValueAMD64_OpMulMaskedUint16x8(v)
+ case OpMulMaskedUint32x16:
+ return rewriteValueAMD64_OpMulMaskedUint32x16(v)
+ case OpMulMaskedUint32x4:
+ return rewriteValueAMD64_OpMulMaskedUint32x4(v)
+ case OpMulMaskedUint32x8:
+ return rewriteValueAMD64_OpMulMaskedUint32x8(v)
+ case OpMulMaskedUint64x2:
+ return rewriteValueAMD64_OpMulMaskedUint64x2(v)
+ case OpMulMaskedUint64x4:
+ return rewriteValueAMD64_OpMulMaskedUint64x4(v)
+ case OpMulMaskedUint64x8:
+ return rewriteValueAMD64_OpMulMaskedUint64x8(v)
+ case OpMulSubAddFloat32x16:
+ v.Op = OpAMD64VFMSUBADD213PS512
+ return true
+ case OpMulSubAddFloat32x4:
+ v.Op = OpAMD64VFMSUBADD213PS128
+ return true
+ case OpMulSubAddFloat32x8:
+ v.Op = OpAMD64VFMSUBADD213PS256
+ return true
+ case OpMulSubAddFloat64x2:
+ v.Op = OpAMD64VFMSUBADD213PD128
+ return true
+ case OpMulSubAddFloat64x4:
+ v.Op = OpAMD64VFMSUBADD213PD256
+ return true
+ case OpMulSubAddFloat64x8:
+ v.Op = OpAMD64VFMSUBADD213PD512
+ return true
+ case OpMulSubAddMaskedFloat32x16:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v)
+ case OpMulSubAddMaskedFloat32x4:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v)
+ case OpMulSubAddMaskedFloat32x8:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v)
+ case OpMulSubAddMaskedFloat64x2:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v)
+ case OpMulSubAddMaskedFloat64x4:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v)
+ case OpMulSubAddMaskedFloat64x8:
+ return rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v)
+ case OpMulUint16x16:
+ v.Op = OpAMD64VPMULLW256
+ return true
+ case OpMulUint16x32:
+ v.Op = OpAMD64VPMULLW512
+ return true
+ case OpMulUint16x8:
+ v.Op = OpAMD64VPMULLW128
+ return true
+ case OpMulUint32x16:
+ v.Op = OpAMD64VPMULLD512
+ return true
+ case OpMulUint32x4:
+ v.Op = OpAMD64VPMULLD128
+ return true
+ case OpMulUint32x8:
+ v.Op = OpAMD64VPMULLD256
+ return true
+ case OpMulUint64x2:
+ v.Op = OpAMD64VPMULLQ128
+ return true
+ case OpMulUint64x4:
+ v.Op = OpAMD64VPMULLQ256
+ return true
+ case OpMulUint64x8:
+ v.Op = OpAMD64VPMULLQ512
+ return true
case OpNeg16:
v.Op = OpAMD64NEGL
return true
return rewriteValueAMD64_OpNotEqualUint8x64(v)
case OpOffPtr:
return rewriteValueAMD64_OpOffPtr(v)
+ case OpOnesCountInt16x16:
+ v.Op = OpAMD64VPOPCNTW256
+ return true
+ case OpOnesCountInt16x32:
+ v.Op = OpAMD64VPOPCNTW512
+ return true
+ case OpOnesCountInt16x8:
+ v.Op = OpAMD64VPOPCNTW128
+ return true
+ case OpOnesCountInt32x16:
+ v.Op = OpAMD64VPOPCNTD512
+ return true
+ case OpOnesCountInt32x4:
+ v.Op = OpAMD64VPOPCNTD128
+ return true
+ case OpOnesCountInt32x8:
+ v.Op = OpAMD64VPOPCNTD256
+ return true
+ case OpOnesCountInt64x2:
+ v.Op = OpAMD64VPOPCNTQ128
+ return true
+ case OpOnesCountInt64x4:
+ v.Op = OpAMD64VPOPCNTQ256
+ return true
+ case OpOnesCountInt64x8:
+ v.Op = OpAMD64VPOPCNTQ512
+ return true
+ case OpOnesCountInt8x16:
+ v.Op = OpAMD64VPOPCNTB128
+ return true
+ case OpOnesCountInt8x32:
+ v.Op = OpAMD64VPOPCNTB256
+ return true
+ case OpOnesCountInt8x64:
+ v.Op = OpAMD64VPOPCNTB512
+ return true
+ case OpOnesCountMaskedInt16x16:
+ return rewriteValueAMD64_OpOnesCountMaskedInt16x16(v)
+ case OpOnesCountMaskedInt16x32:
+ return rewriteValueAMD64_OpOnesCountMaskedInt16x32(v)
+ case OpOnesCountMaskedInt16x8:
+ return rewriteValueAMD64_OpOnesCountMaskedInt16x8(v)
+ case OpOnesCountMaskedInt32x16:
+ return rewriteValueAMD64_OpOnesCountMaskedInt32x16(v)
+ case OpOnesCountMaskedInt32x4:
+ return rewriteValueAMD64_OpOnesCountMaskedInt32x4(v)
+ case OpOnesCountMaskedInt32x8:
+ return rewriteValueAMD64_OpOnesCountMaskedInt32x8(v)
+ case OpOnesCountMaskedInt64x2:
+ return rewriteValueAMD64_OpOnesCountMaskedInt64x2(v)
+ case OpOnesCountMaskedInt64x4:
+ return rewriteValueAMD64_OpOnesCountMaskedInt64x4(v)
+ case OpOnesCountMaskedInt64x8:
+ return rewriteValueAMD64_OpOnesCountMaskedInt64x8(v)
+ case OpOnesCountMaskedInt8x16:
+ return rewriteValueAMD64_OpOnesCountMaskedInt8x16(v)
+ case OpOnesCountMaskedInt8x32:
+ return rewriteValueAMD64_OpOnesCountMaskedInt8x32(v)
+ case OpOnesCountMaskedInt8x64:
+ return rewriteValueAMD64_OpOnesCountMaskedInt8x64(v)
+ case OpOnesCountMaskedUint16x16:
+ return rewriteValueAMD64_OpOnesCountMaskedUint16x16(v)
+ case OpOnesCountMaskedUint16x32:
+ return rewriteValueAMD64_OpOnesCountMaskedUint16x32(v)
+ case OpOnesCountMaskedUint16x8:
+ return rewriteValueAMD64_OpOnesCountMaskedUint16x8(v)
+ case OpOnesCountMaskedUint32x16:
+ return rewriteValueAMD64_OpOnesCountMaskedUint32x16(v)
+ case OpOnesCountMaskedUint32x4:
+ return rewriteValueAMD64_OpOnesCountMaskedUint32x4(v)
+ case OpOnesCountMaskedUint32x8:
+ return rewriteValueAMD64_OpOnesCountMaskedUint32x8(v)
+ case OpOnesCountMaskedUint64x2:
+ return rewriteValueAMD64_OpOnesCountMaskedUint64x2(v)
+ case OpOnesCountMaskedUint64x4:
+ return rewriteValueAMD64_OpOnesCountMaskedUint64x4(v)
+ case OpOnesCountMaskedUint64x8:
+ return rewriteValueAMD64_OpOnesCountMaskedUint64x8(v)
+ case OpOnesCountMaskedUint8x16:
+ return rewriteValueAMD64_OpOnesCountMaskedUint8x16(v)
+ case OpOnesCountMaskedUint8x32:
+ return rewriteValueAMD64_OpOnesCountMaskedUint8x32(v)
+ case OpOnesCountMaskedUint8x64:
+ return rewriteValueAMD64_OpOnesCountMaskedUint8x64(v)
+ case OpOnesCountUint16x16:
+ v.Op = OpAMD64VPOPCNTW256
+ return true
+ case OpOnesCountUint16x32:
+ v.Op = OpAMD64VPOPCNTW512
+ return true
+ case OpOnesCountUint16x8:
+ v.Op = OpAMD64VPOPCNTW128
+ return true
+ case OpOnesCountUint32x16:
+ v.Op = OpAMD64VPOPCNTD512
+ return true
+ case OpOnesCountUint32x4:
+ v.Op = OpAMD64VPOPCNTD128
+ return true
+ case OpOnesCountUint32x8:
+ v.Op = OpAMD64VPOPCNTD256
+ return true
+ case OpOnesCountUint64x2:
+ v.Op = OpAMD64VPOPCNTQ128
+ return true
+ case OpOnesCountUint64x4:
+ v.Op = OpAMD64VPOPCNTQ256
+ return true
+ case OpOnesCountUint64x8:
+ v.Op = OpAMD64VPOPCNTQ512
+ return true
+ case OpOnesCountUint8x16:
+ v.Op = OpAMD64VPOPCNTB128
+ return true
+ case OpOnesCountUint8x32:
+ v.Op = OpAMD64VPOPCNTB256
+ return true
+ case OpOnesCountUint8x64:
+ v.Op = OpAMD64VPOPCNTB512
+ return true
case OpOr16:
v.Op = OpAMD64ORL
return true
case OpOrUint8x64:
v.Op = OpAMD64VPORD512
return true
- case OpPairDotProdInt16x16:
- v.Op = OpAMD64VPMADDWD256
- return true
- case OpPairDotProdInt16x32:
- v.Op = OpAMD64VPMADDWD512
- return true
- case OpPairDotProdInt16x8:
- v.Op = OpAMD64VPMADDWD128
- return true
- case OpPairDotProdMaskedInt16x16:
- return rewriteValueAMD64_OpPairDotProdMaskedInt16x16(v)
- case OpPairDotProdMaskedInt16x32:
- return rewriteValueAMD64_OpPairDotProdMaskedInt16x32(v)
- case OpPairDotProdMaskedInt16x8:
- return rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v)
case OpPanicBounds:
v.Op = OpAMD64LoweredPanicBoundsRR
return true
return true
case OpPopCount8:
return rewriteValueAMD64_OpPopCount8(v)
- case OpPopCountInt16x16:
- v.Op = OpAMD64VPOPCNTW256
- return true
- case OpPopCountInt16x32:
- v.Op = OpAMD64VPOPCNTW512
- return true
- case OpPopCountInt16x8:
- v.Op = OpAMD64VPOPCNTW128
- return true
- case OpPopCountInt32x16:
- v.Op = OpAMD64VPOPCNTD512
- return true
- case OpPopCountInt32x4:
- v.Op = OpAMD64VPOPCNTD128
- return true
- case OpPopCountInt32x8:
- v.Op = OpAMD64VPOPCNTD256
- return true
- case OpPopCountInt64x2:
- v.Op = OpAMD64VPOPCNTQ128
- return true
- case OpPopCountInt64x4:
- v.Op = OpAMD64VPOPCNTQ256
- return true
- case OpPopCountInt64x8:
- v.Op = OpAMD64VPOPCNTQ512
- return true
- case OpPopCountInt8x16:
- v.Op = OpAMD64VPOPCNTB128
- return true
- case OpPopCountInt8x32:
- v.Op = OpAMD64VPOPCNTB256
- return true
- case OpPopCountInt8x64:
- v.Op = OpAMD64VPOPCNTB512
- return true
- case OpPopCountMaskedInt16x16:
- return rewriteValueAMD64_OpPopCountMaskedInt16x16(v)
- case OpPopCountMaskedInt16x32:
- return rewriteValueAMD64_OpPopCountMaskedInt16x32(v)
- case OpPopCountMaskedInt16x8:
- return rewriteValueAMD64_OpPopCountMaskedInt16x8(v)
- case OpPopCountMaskedInt32x16:
- return rewriteValueAMD64_OpPopCountMaskedInt32x16(v)
- case OpPopCountMaskedInt32x4:
- return rewriteValueAMD64_OpPopCountMaskedInt32x4(v)
- case OpPopCountMaskedInt32x8:
- return rewriteValueAMD64_OpPopCountMaskedInt32x8(v)
- case OpPopCountMaskedInt64x2:
- return rewriteValueAMD64_OpPopCountMaskedInt64x2(v)
- case OpPopCountMaskedInt64x4:
- return rewriteValueAMD64_OpPopCountMaskedInt64x4(v)
- case OpPopCountMaskedInt64x8:
- return rewriteValueAMD64_OpPopCountMaskedInt64x8(v)
- case OpPopCountMaskedInt8x16:
- return rewriteValueAMD64_OpPopCountMaskedInt8x16(v)
- case OpPopCountMaskedInt8x32:
- return rewriteValueAMD64_OpPopCountMaskedInt8x32(v)
- case OpPopCountMaskedInt8x64:
- return rewriteValueAMD64_OpPopCountMaskedInt8x64(v)
- case OpPopCountMaskedUint16x16:
- return rewriteValueAMD64_OpPopCountMaskedUint16x16(v)
- case OpPopCountMaskedUint16x32:
- return rewriteValueAMD64_OpPopCountMaskedUint16x32(v)
- case OpPopCountMaskedUint16x8:
- return rewriteValueAMD64_OpPopCountMaskedUint16x8(v)
- case OpPopCountMaskedUint32x16:
- return rewriteValueAMD64_OpPopCountMaskedUint32x16(v)
- case OpPopCountMaskedUint32x4:
- return rewriteValueAMD64_OpPopCountMaskedUint32x4(v)
- case OpPopCountMaskedUint32x8:
- return rewriteValueAMD64_OpPopCountMaskedUint32x8(v)
- case OpPopCountMaskedUint64x2:
- return rewriteValueAMD64_OpPopCountMaskedUint64x2(v)
- case OpPopCountMaskedUint64x4:
- return rewriteValueAMD64_OpPopCountMaskedUint64x4(v)
- case OpPopCountMaskedUint64x8:
- return rewriteValueAMD64_OpPopCountMaskedUint64x8(v)
- case OpPopCountMaskedUint8x16:
- return rewriteValueAMD64_OpPopCountMaskedUint8x16(v)
- case OpPopCountMaskedUint8x32:
- return rewriteValueAMD64_OpPopCountMaskedUint8x32(v)
- case OpPopCountMaskedUint8x64:
- return rewriteValueAMD64_OpPopCountMaskedUint8x64(v)
- case OpPopCountUint16x16:
- v.Op = OpAMD64VPOPCNTW256
+ case OpPrefetchCache:
+ v.Op = OpAMD64PrefetchT0
return true
- case OpPopCountUint16x32:
- v.Op = OpAMD64VPOPCNTW512
+ case OpPrefetchCacheStreamed:
+ v.Op = OpAMD64PrefetchNTA
return true
- case OpPopCountUint16x8:
- v.Op = OpAMD64VPOPCNTW128
+ case OpReciprocalFloat32x16:
+ v.Op = OpAMD64VRCP14PS512
return true
- case OpPopCountUint32x16:
- v.Op = OpAMD64VPOPCNTD512
+ case OpReciprocalFloat32x4:
+ v.Op = OpAMD64VRCPPS128
return true
- case OpPopCountUint32x4:
- v.Op = OpAMD64VPOPCNTD128
+ case OpReciprocalFloat32x8:
+ v.Op = OpAMD64VRCPPS256
return true
- case OpPopCountUint32x8:
- v.Op = OpAMD64VPOPCNTD256
+ case OpReciprocalFloat64x2:
+ v.Op = OpAMD64VRCP14PD128
return true
- case OpPopCountUint64x2:
- v.Op = OpAMD64VPOPCNTQ128
+ case OpReciprocalFloat64x4:
+ v.Op = OpAMD64VRCP14PD256
return true
- case OpPopCountUint64x4:
- v.Op = OpAMD64VPOPCNTQ256
+ case OpReciprocalFloat64x8:
+ v.Op = OpAMD64VRCP14PD512
return true
- case OpPopCountUint64x8:
- v.Op = OpAMD64VPOPCNTQ512
+ case OpReciprocalMaskedFloat32x16:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v)
+ case OpReciprocalMaskedFloat32x4:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v)
+ case OpReciprocalMaskedFloat32x8:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v)
+ case OpReciprocalMaskedFloat64x2:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v)
+ case OpReciprocalMaskedFloat64x4:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v)
+ case OpReciprocalMaskedFloat64x8:
+ return rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v)
+ case OpReciprocalSqrtFloat32x16:
+ v.Op = OpAMD64VRSQRT14PS512
return true
- case OpPopCountUint8x16:
- v.Op = OpAMD64VPOPCNTB128
+ case OpReciprocalSqrtFloat32x4:
+ v.Op = OpAMD64VRSQRTPS128
return true
- case OpPopCountUint8x32:
- v.Op = OpAMD64VPOPCNTB256
+ case OpReciprocalSqrtFloat32x8:
+ v.Op = OpAMD64VRSQRTPS256
return true
- case OpPopCountUint8x64:
- v.Op = OpAMD64VPOPCNTB512
+ case OpReciprocalSqrtFloat64x2:
+ v.Op = OpAMD64VRSQRT14PD128
return true
- case OpPrefetchCache:
- v.Op = OpAMD64PrefetchT0
+ case OpReciprocalSqrtFloat64x4:
+ v.Op = OpAMD64VRSQRT14PD256
return true
- case OpPrefetchCacheStreamed:
- v.Op = OpAMD64PrefetchNTA
+ case OpReciprocalSqrtFloat64x8:
+ v.Op = OpAMD64VRSQRT14PD512
return true
+ case OpReciprocalSqrtMaskedFloat32x16:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v)
+ case OpReciprocalSqrtMaskedFloat32x4:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v)
+ case OpReciprocalSqrtMaskedFloat32x8:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v)
+ case OpReciprocalSqrtMaskedFloat64x2:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v)
+ case OpReciprocalSqrtMaskedFloat64x4:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v)
+ case OpReciprocalSqrtMaskedFloat64x8:
+ return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v)
case OpRotateAllLeftInt32x16:
v.Op = OpAMD64VPROLD512
return true
case OpRound64F:
v.Op = OpAMD64LoweredRound64F
return true
- case OpRoundFloat32x4:
- return rewriteValueAMD64_OpRoundFloat32x4(v)
- case OpRoundFloat32x8:
- return rewriteValueAMD64_OpRoundFloat32x8(v)
- case OpRoundFloat64x2:
- return rewriteValueAMD64_OpRoundFloat64x2(v)
- case OpRoundFloat64x4:
- return rewriteValueAMD64_OpRoundFloat64x4(v)
- case OpRoundScaledFloat32x16:
- return rewriteValueAMD64_OpRoundScaledFloat32x16(v)
- case OpRoundScaledFloat32x4:
- return rewriteValueAMD64_OpRoundScaledFloat32x4(v)
- case OpRoundScaledFloat32x8:
- return rewriteValueAMD64_OpRoundScaledFloat32x8(v)
- case OpRoundScaledFloat64x2:
- return rewriteValueAMD64_OpRoundScaledFloat64x2(v)
- case OpRoundScaledFloat64x4:
- return rewriteValueAMD64_OpRoundScaledFloat64x4(v)
- case OpRoundScaledFloat64x8:
- return rewriteValueAMD64_OpRoundScaledFloat64x8(v)
- case OpRoundScaledMaskedFloat32x16:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat32x16(v)
- case OpRoundScaledMaskedFloat32x4:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat32x4(v)
- case OpRoundScaledMaskedFloat32x8:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat32x8(v)
- case OpRoundScaledMaskedFloat64x2:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat64x2(v)
- case OpRoundScaledMaskedFloat64x4:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat64x4(v)
- case OpRoundScaledMaskedFloat64x8:
- return rewriteValueAMD64_OpRoundScaledMaskedFloat64x8(v)
- case OpRoundScaledResidueFloat32x16:
- return rewriteValueAMD64_OpRoundScaledResidueFloat32x16(v)
- case OpRoundScaledResidueFloat32x4:
- return rewriteValueAMD64_OpRoundScaledResidueFloat32x4(v)
- case OpRoundScaledResidueFloat32x8:
- return rewriteValueAMD64_OpRoundScaledResidueFloat32x8(v)
- case OpRoundScaledResidueFloat64x2:
- return rewriteValueAMD64_OpRoundScaledResidueFloat64x2(v)
- case OpRoundScaledResidueFloat64x4:
- return rewriteValueAMD64_OpRoundScaledResidueFloat64x4(v)
- case OpRoundScaledResidueFloat64x8:
- return rewriteValueAMD64_OpRoundScaledResidueFloat64x8(v)
- case OpRoundScaledResidueMaskedFloat32x16:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x16(v)
- case OpRoundScaledResidueMaskedFloat32x4:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x4(v)
- case OpRoundScaledResidueMaskedFloat32x8:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x8(v)
- case OpRoundScaledResidueMaskedFloat64x2:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x2(v)
- case OpRoundScaledResidueMaskedFloat64x4:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x4(v)
- case OpRoundScaledResidueMaskedFloat64x8:
- return rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x8(v)
case OpRoundToEven:
return rewriteValueAMD64_OpRoundToEven(v)
+ case OpRoundToEvenFloat32x4:
+ return rewriteValueAMD64_OpRoundToEvenFloat32x4(v)
+ case OpRoundToEvenFloat32x8:
+ return rewriteValueAMD64_OpRoundToEvenFloat32x8(v)
+ case OpRoundToEvenFloat64x2:
+ return rewriteValueAMD64_OpRoundToEvenFloat64x2(v)
+ case OpRoundToEvenFloat64x4:
+ return rewriteValueAMD64_OpRoundToEvenFloat64x4(v)
+ case OpRoundToEvenScaledFloat32x16:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v)
+ case OpRoundToEvenScaledFloat32x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v)
+ case OpRoundToEvenScaledFloat32x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v)
+ case OpRoundToEvenScaledFloat64x2:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v)
+ case OpRoundToEvenScaledFloat64x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v)
+ case OpRoundToEvenScaledFloat64x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v)
+ case OpRoundToEvenScaledMaskedFloat32x16:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v)
+ case OpRoundToEvenScaledMaskedFloat32x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v)
+ case OpRoundToEvenScaledMaskedFloat32x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v)
+ case OpRoundToEvenScaledMaskedFloat64x2:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v)
+ case OpRoundToEvenScaledMaskedFloat64x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v)
+ case OpRoundToEvenScaledMaskedFloat64x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v)
+ case OpRoundToEvenScaledResidueFloat32x16:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v)
+ case OpRoundToEvenScaledResidueFloat32x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v)
+ case OpRoundToEvenScaledResidueFloat32x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v)
+ case OpRoundToEvenScaledResidueFloat64x2:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v)
+ case OpRoundToEvenScaledResidueFloat64x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v)
+ case OpRoundToEvenScaledResidueFloat64x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v)
+ case OpRoundToEvenScaledResidueMaskedFloat32x16:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v)
+ case OpRoundToEvenScaledResidueMaskedFloat32x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v)
+ case OpRoundToEvenScaledResidueMaskedFloat32x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v)
+ case OpRoundToEvenScaledResidueMaskedFloat64x2:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v)
+ case OpRoundToEvenScaledResidueMaskedFloat64x4:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v)
+ case OpRoundToEvenScaledResidueMaskedFloat64x8:
+ return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v)
case OpRsh16Ux16:
return rewriteValueAMD64_OpRsh16Ux16(v)
case OpRsh16Ux32:
return rewriteValueAMD64_OpRsh8x64(v)
case OpRsh8x8:
return rewriteValueAMD64_OpRsh8x8(v)
- case OpSaturatedAddDotProdInt32x16:
- v.Op = OpAMD64VPDPWSSDS512
- return true
- case OpSaturatedAddDotProdInt32x4:
- v.Op = OpAMD64VPDPWSSDS128
- return true
- case OpSaturatedAddDotProdInt32x8:
- v.Op = OpAMD64VPDPWSSDS256
- return true
- case OpSaturatedAddDotProdMaskedInt32x16:
- return rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x16(v)
- case OpSaturatedAddDotProdMaskedInt32x4:
- return rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x4(v)
- case OpSaturatedAddDotProdMaskedInt32x8:
- return rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x8(v)
- case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v)
- case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v)
- case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v)
- case OpSaturatedUnsignedSignedPairDotProdUint8x16:
- v.Op = OpAMD64VPMADDUBSW128
- return true
- case OpSaturatedUnsignedSignedPairDotProdUint8x32:
- v.Op = OpAMD64VPMADDUBSW256
- return true
- case OpSaturatedUnsignedSignedPairDotProdUint8x64:
- v.Op = OpAMD64VPMADDUBSW512
- return true
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16:
- v.Op = OpAMD64VPDPBUSDS512
- return true
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4:
- v.Op = OpAMD64VPDPBUSDS128
- return true
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8:
- v.Op = OpAMD64VPDPBUSDS256
- return true
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v)
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
- case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
- return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
case OpScaleFloat32x16:
v.Op = OpAMD64VSCALEFPS512
return true
case OpSignExt8to64:
v.Op = OpAMD64MOVBQSX
return true
- case OpSignInt16x16:
- v.Op = OpAMD64VPSIGNW256
- return true
- case OpSignInt16x8:
- v.Op = OpAMD64VPSIGNW128
- return true
- case OpSignInt32x4:
- v.Op = OpAMD64VPSIGND128
- return true
- case OpSignInt32x8:
- v.Op = OpAMD64VPSIGND256
- return true
- case OpSignInt8x16:
- v.Op = OpAMD64VPSIGNB128
- return true
- case OpSignInt8x32:
- v.Op = OpAMD64VPSIGNB256
- return true
case OpSlicemask:
return rewriteValueAMD64_OpSlicemask(v)
case OpSpectreIndex:
case OpSubSaturatedMaskedUint8x64:
return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
case OpSubSaturatedUint16x16:
- v.Op = OpAMD64VPSUBSW256
+ v.Op = OpAMD64VPSUBUSW256
return true
case OpSubSaturatedUint16x32:
- v.Op = OpAMD64VPSUBSW512
+ v.Op = OpAMD64VPSUBUSW512
return true
case OpSubSaturatedUint16x8:
- v.Op = OpAMD64VPSUBSW128
+ v.Op = OpAMD64VPSUBUSW128
return true
case OpSubSaturatedUint8x16:
- v.Op = OpAMD64VPSUBSB128
+ v.Op = OpAMD64VPSUBUSB128
return true
case OpSubSaturatedUint8x32:
- v.Op = OpAMD64VPSUBSB256
+ v.Op = OpAMD64VPSUBUSB256
return true
case OpSubSaturatedUint8x64:
- v.Op = OpAMD64VPSUBSB512
+ v.Op = OpAMD64VPSUBUSB512
return true
case OpSubUint16x16:
v.Op = OpAMD64VPSUBW256
return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
case OpTruncScaledResidueMaskedFloat64x8:
return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
- case OpUnsignedSignedQuadDotProdAccumulateInt32x16:
- v.Op = OpAMD64VPDPBUSD512
- return true
- case OpUnsignedSignedQuadDotProdAccumulateInt32x4:
- v.Op = OpAMD64VPDPBUSD128
- return true
- case OpUnsignedSignedQuadDotProdAccumulateInt32x8:
- v.Op = OpAMD64VPDPBUSD256
- return true
- case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16:
- return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v)
- case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4:
- return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v)
- case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8:
- return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v)
case OpWB:
v.Op = OpAMD64LoweredWB
return true
}
return false
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt16x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt16x16 x mask)
+ // match: (AbsMaskedInt16x16 x mask)
// result: (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt16x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt16x32 x mask)
+ // match: (AbsMaskedInt16x32 x mask)
// result: (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt16x8 x mask)
+ // match: (AbsMaskedInt16x8 x mask)
// result: (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt32x16 x mask)
+ // match: (AbsMaskedInt32x16 x mask)
// result: (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt32x4 x mask)
+ // match: (AbsMaskedInt32x4 x mask)
// result: (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt32x8 x mask)
+ // match: (AbsMaskedInt32x8 x mask)
// result: (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt64x2 x mask)
+ // match: (AbsMaskedInt64x2 x mask)
// result: (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt64x4 x mask)
+ // match: (AbsMaskedInt64x4 x mask)
// result: (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt64x8 x mask)
+ // match: (AbsMaskedInt64x8 x mask)
// result: (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt8x16(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt8x16 x mask)
+ // match: (AbsMaskedInt8x16 x mask)
// result: (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt8x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt8x32 x mask)
+ // match: (AbsMaskedInt8x32 x mask)
// result: (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAbsoluteMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpAbsMaskedInt8x64(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AbsoluteMaskedInt8x64 x mask)
+ // match: (AbsMaskedInt8x64 x mask)
// result: (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpAddDotProdMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask)
+ // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask)
+ // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask)
+ // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPWSSDSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdQuadrupleMaskedInt32x16 x y z mask)
+ // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdQuadrupleMaskedInt32x4 x y z mask)
+ // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (AddDotProdQuadrupleMaskedInt32x8 x y z mask)
+ // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VPDPBUSDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AddDotProdMaskedInt32x16 x y z mask)
- // result: (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ // match: (AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask)
+ // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
- v.reset(OpAMD64VPDPWSSDMasked512)
+ v.reset(OpAMD64VPDPBUSDSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpAddDotProdMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AddDotProdMaskedInt32x4 x y z mask)
- // result: (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ // match: (AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask)
+ // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
- v.reset(OpAMD64VPDPWSSDMasked128)
+ v.reset(OpAMD64VPDPBUSDSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpAddDotProdMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (AddDotProdMaskedInt32x8 x y z mask)
- // result: (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ // match: (AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask)
+ // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
z := v_2
mask := v_3
- v.reset(OpAMD64VPDPWSSDMasked256)
+ v.reset(OpAMD64VPDPBUSDSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg4(x, y, z, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint16x16 x y mask)
- // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // result: (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSWMasked256)
+ v.reset(OpAMD64VPADDUSWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint16x32 x y mask)
- // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // result: (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSWMasked512)
+ v.reset(OpAMD64VPADDUSWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint16x8 x y mask)
- // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // result: (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSWMasked128)
+ v.reset(OpAMD64VPADDUSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint8x16 x y mask)
- // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ // result: (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSBMasked128)
+ v.reset(OpAMD64VPADDUSBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint8x32 x y mask)
- // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ // result: (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSBMasked256)
+ v.reset(OpAMD64VPADDUSBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (AddSaturatedMaskedUint8x64 x y mask)
- // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ // result: (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPADDSBMasked512)
+ v.reset(OpAMD64VPADDUSBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat32x16 x mask)
- // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat32x4 x mask)
- // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat32x8 x mask)
- // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat64x2 x mask)
- // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat64x4 x mask)
- // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalMaskedFloat64x8 x mask)
- // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRCP14PDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat32x16 x mask)
- // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat32x4 x mask)
- // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat32x8 x mask)
- // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat64x2 x mask)
- // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat64x4 x mask)
- // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (ApproximateReciprocalOfSqrtMaskedFloat64x8 x mask)
- // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VRSQRT14PDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
-func rewriteValueAMD64_OpDotProdBroadcastFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (DotProdBroadcastFloat32x4 x y)
- // result: (VDPPS128 [127] x y)
+ b := v.Block
+ // match: (DotProdPairsMaskedInt16x16 x y mask)
+ // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- v.reset(OpAMD64VDPPS128)
- v.AuxInt = int8ToAuxInt(127)
- v.AddArg2(x, y)
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpDotProdBroadcastFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v *Value) bool {
+ v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (DotProdBroadcastFloat32x8 x y)
- // result: (VDPPS256 [127] x y)
+ b := v.Block
+ // match: (DotProdPairsMaskedInt16x32 x y mask)
+ // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- v.reset(OpAMD64VDPPS256)
- v.AuxInt = int8ToAuxInt(127)
- v.AddArg2(x, y)
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v *Value) bool {
+ v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (DotProdBroadcastFloat64x2 x y)
- // result: (VDPPD128 [127] x y)
+ b := v.Block
+ // match: (DotProdPairsMaskedInt16x8 x y mask)
+ // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- v.reset(OpAMD64VDPPD128)
- v.AuxInt = int8ToAuxInt(127)
- v.AddArg2(x, y)
+ mask := v_2
+ v.reset(OpAMD64VPMADDWDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (DotProdPairsSaturatedMaskedUint8x16 x y mask)
+ // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (DotProdPairsSaturatedMaskedUint8x32 x y mask)
+ // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (DotProdPairsSaturatedMaskedUint8x64 x y mask)
+ // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMADDUBSWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
return true
}
}
return true
}
}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat32x16 x y z mask)
- // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat32x4 x y z mask)
- // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat32x8 x y z mask)
- // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x2(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat64x2 x y z mask)
- // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat64x4 x y z mask)
- // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddMaskedFloat64x8 x y z mask)
- // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADD213PDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat32x16 x y z mask)
- // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat32x4 x y z mask)
- // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat32x8 x y z mask)
- // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x2(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat64x2 x y z mask)
- // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat64x4 x y z mask)
- // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplyAddSubMaskedFloat64x8 x y z mask)
- // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMADDSUB213PDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat32x16 x y z mask)
- // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat32x4 x y z mask)
- // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat32x8 x y z mask)
- // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x2(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat64x2 x y z mask)
- // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat64x4 x y z mask)
- // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (FusedMultiplySubAddMaskedFloat64x8 x y z mask)
- // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VFMSUBADD213PDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
}
return false
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat32x16(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedInt64x2 x y mask)
- // result: (VPMULDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat32x16 x y z mask)
+ // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULDQMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat32x4(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedInt64x4 x y mask)
- // result: (VPMULDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat32x4 x y z mask)
+ // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULDQMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat32x8(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedInt64x8 x y mask)
- // result: (VPMULDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat32x8 x y z mask)
+ // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULDQMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat64x2(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedUint64x2 x y mask)
- // result: (VPMULUDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat64x2 x y z mask)
+ // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULUDQMasked128)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat64x4(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedUint64x4 x y mask)
- // result: (VPMULUDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat64x4 x y z mask)
+ // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULUDQMasked256)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpMulAddMaskedFloat64x8(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulEvenWidenMaskedUint64x8 x y mask)
- // result: (VPMULUDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (MulAddMaskedFloat64x8 x y z mask)
+ // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULUDQMasked512)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADD213PDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedInt16x16 x y mask)
- // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (MulAddSubMaskedFloat32x16 x y z mask)
+ // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULHWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedInt16x32 x y mask)
- // result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // match: (MulAddSubMaskedFloat32x4 x y z mask)
+ // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULHWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedInt16x8 x y mask)
- // result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // match: (MulAddSubMaskedFloat32x8 x y z mask)
+ // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
- mask := v_2
- v.reset(OpAMD64VPMULHWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
- v.AddArg3(x, y, v0)
+ v.AddArg4(x, y, z, v0)
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v *Value) bool {
+ v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedUint16x16 x y mask)
- // result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // match: (MulAddSubMaskedFloat64x2 x y z mask)
+ // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulAddSubMaskedFloat64x4 x y z mask)
+ // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulAddSubMaskedFloat64x8 x y z mask)
+ // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMADDSUB213PDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulHighMaskedInt16x16 x y mask)
+ // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPMULHUWMasked256)
+ v.reset(OpAMD64VPMULHWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedUint16x32 x y mask)
+ // match: (MulHighMaskedInt16x32 x y mask)
// result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (MulHighMaskedUint16x8 x y mask)
+ // match: (MulHighMaskedInt16x8 x y mask)
// result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
return true
}
}
+func rewriteValueAMD64_OpMulMaskedUint16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint16x16 x y mask)
+ // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint16x32 x y mask)
+ // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint16x8 x y mask)
+ // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint32x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint32x16 x y mask)
+ // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint32x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint32x4 x y mask)
+ // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint32x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint32x8 x y mask)
+ // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint64x2(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint64x2 x y mask)
+ // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint64x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint64x4 x y mask)
+ // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulMaskedUint64x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulMaskedUint64x8 x y mask)
+ // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ mask := v_2
+ v.reset(OpAMD64VPMULLQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(x, y, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat32x16 x y z mask)
+ // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PSMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat32x4 x y z mask)
+ // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PSMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat32x8 x y z mask)
+ // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PSMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat64x2 x y z mask)
+ // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat64x4 x y z mask)
+ // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (MulSubAddMaskedFloat64x8 x y z mask)
+ // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ y := v_1
+ z := v_2
+ mask := v_3
+ v.reset(OpAMD64VFMSUBADD213PDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg4(x, y, z, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpNeg32F(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
return true
}
}
+func rewriteValueAMD64_OpOnesCountMaskedInt16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt16x16 x mask)
+ // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt16x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt16x32 x mask)
+ // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt16x8 x mask)
+ // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt32x16 x mask)
+ // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt32x4 x mask)
+ // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt32x8 x mask)
+ // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt64x2 x mask)
+ // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt64x4 x mask)
+ // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt64x8 x mask)
+ // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt8x16 x mask)
+ // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt8x32 x mask)
+ // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedInt8x64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedInt8x64 x mask)
+ // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint16x16 x mask)
+ // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint16x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint16x32 x mask)
+ // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint16x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint16x8 x mask)
+ // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTWMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint32x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint32x16 x mask)
+ // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint32x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint32x4 x mask)
+ // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint32x8 x mask)
+ // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTDMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint64x2(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint64x2 x mask)
+ // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint64x4 x mask)
+ // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint64x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint64x8 x mask)
+ // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTQMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint8x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint8x16 x mask)
+ // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked128)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint8x32 x mask)
+ // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked256)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpOnesCountMaskedUint8x64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (OnesCountMaskedUint8x64 x mask)
+ // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+ for {
+ x := v_0
+ mask := v_1
+ v.reset(OpAMD64VPOPCNTBMasked512)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg2(x, v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
return true
}
}
-func rewriteValueAMD64_OpPairDotProdMaskedInt16x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PairDotProdMaskedInt16x16 x y mask)
- // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDWDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPairDotProdMaskedInt16x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PairDotProdMaskedInt16x32 x y mask)
- // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDWDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PairDotProdMaskedInt16x8 x y mask)
- // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDWDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt16x16 x mask)
- // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedInt16x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedInt16x32 x mask)
- // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedInt16x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedInt16x8 x mask)
- // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedInt32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedInt32x16 x mask)
- // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat32x16 x mask)
+ // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked512)
+ v.reset(OpAMD64VRCP14PSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt32x4 x mask)
- // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat32x4 x mask)
+ // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked128)
+ v.reset(OpAMD64VRCP14PSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt32x8 x mask)
- // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat32x8 x mask)
+ // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked256)
+ v.reset(OpAMD64VRCP14PSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt64x2 x mask)
- // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat64x2 x mask)
+ // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked128)
+ v.reset(OpAMD64VRCP14PDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt64x4 x mask)
- // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat64x4 x mask)
+ // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked256)
+ v.reset(OpAMD64VRCP14PDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt64x8 x mask)
- // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (ReciprocalMaskedFloat64x8 x mask)
+ // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked512)
+ v.reset(OpAMD64VRCP14PDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedInt8x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedInt8x16 x mask)
- // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedInt8x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedInt8x32 x mask)
- // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedInt8x64 x mask)
- // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint16x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint16x16 x mask)
- // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint16x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint16x32 x mask)
- // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint16x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint16x8 x mask)
- // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint32x16 x mask)
- // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat32x16 x mask)
+ // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked512)
+ v.reset(OpAMD64VRSQRT14PSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedUint32x4 x mask)
- // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat32x4 x mask)
+ // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked128)
+ v.reset(OpAMD64VRSQRT14PSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedUint32x8 x mask)
- // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat32x8 x mask)
+ // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTDMasked256)
+ v.reset(OpAMD64VRSQRT14PSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedUint64x2 x mask)
- // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat64x2 x mask)
+ // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked128)
+ v.reset(OpAMD64VRSQRT14PDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedUint64x4 x mask)
- // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat64x4 x mask)
+ // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked256)
+ v.reset(OpAMD64VRSQRT14PDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (PopCountMaskedUint64x8 x mask)
- // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+ // match: (ReciprocalSqrtMaskedFloat64x8 x mask)
+ // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
- v.reset(OpAMD64VPOPCNTQMasked512)
+ v.reset(OpAMD64VRSQRT14PDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
-func rewriteValueAMD64_OpPopCountMaskedUint8x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint8x16 x mask)
- // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint8x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint8x32 x mask)
- // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpPopCountMaskedUint8x64(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (PopCountMaskedUint8x64 x mask)
- // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
- for {
- x := v_0
- mask := v_1
- v.reset(OpAMD64VPOPCNTBMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg2(x, v0)
- return true
- }
-}
func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return true
}
}
-func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RoundToEven x)
+ // result: (ROUNDSD [0] x)
+ for {
+ x := v_0
+ v.reset(OpAMD64ROUNDSD)
+ v.AuxInt = int8ToAuxInt(0)
+ v.AddArg(x)
+ return true
+ }
+}
+func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundFloat32x4 x)
+ // match: (RoundToEvenFloat32x4 x)
// result: (VROUNDPS128 [0] x)
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpRoundFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundFloat32x8 x)
+ // match: (RoundToEvenFloat32x8 x)
// result: (VROUNDPS256 [0] x)
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpRoundFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundFloat64x2 x)
+ // match: (RoundToEvenFloat64x2 x)
// result: (VROUNDPD128 [0] x)
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpRoundFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundFloat64x4 x)
+ // match: (RoundToEvenFloat64x4 x)
// result: (VROUNDPD256 [0] x)
for {
x := v_0
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat32x16 [a] x)
+ // match: (RoundToEvenScaledFloat32x16 [a] x)
// result: (VRNDSCALEPS512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat32x4 [a] x)
+ // match: (RoundToEvenScaledFloat32x4 [a] x)
// result: (VRNDSCALEPS128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat32x8 [a] x)
+ // match: (RoundToEvenScaledFloat32x8 [a] x)
// result: (VRNDSCALEPS256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat64x2 [a] x)
+ // match: (RoundToEvenScaledFloat64x2 [a] x)
// result: (VRNDSCALEPD128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat64x4 [a] x)
+ // match: (RoundToEvenScaledFloat64x4 [a] x)
// result: (VRNDSCALEPD256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledFloat64x8 [a] x)
+ // match: (RoundToEvenScaledFloat64x8 [a] x)
// result: (VRNDSCALEPD512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat32x16 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat32x16 [a] x mask)
// result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat32x4 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat32x4 [a] x mask)
// result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat32x8 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat32x8 [a] x mask)
// result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat64x2 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat64x2 [a] x mask)
// result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat64x4 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat64x4 [a] x mask)
// result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledMaskedFloat64x8 [a] x mask)
+ // match: (RoundToEvenScaledMaskedFloat64x8 [a] x mask)
// result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat32x16 [a] x)
+ // match: (RoundToEvenScaledResidueFloat32x16 [a] x)
// result: (VREDUCEPS512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat32x4 [a] x)
+ // match: (RoundToEvenScaledResidueFloat32x4 [a] x)
// result: (VREDUCEPS128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat32x8 [a] x)
+ // match: (RoundToEvenScaledResidueFloat32x8 [a] x)
// result: (VREDUCEPS256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat64x2 [a] x)
+ // match: (RoundToEvenScaledResidueFloat64x2 [a] x)
// result: (VREDUCEPD128 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat64x4 [a] x)
+ // match: (RoundToEvenScaledResidueFloat64x4 [a] x)
// result: (VREDUCEPD256 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (RoundScaledResidueFloat64x8 [a] x)
+ // match: (RoundToEvenScaledResidueFloat64x8 [a] x)
// result: (VREDUCEPD512 [a+0] x)
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat32x16 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask)
// result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat32x4 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask)
// result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat32x8 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask)
// result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat64x2 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask)
// result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat64x4 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask)
// result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundScaledResidueMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
- // match: (RoundScaledResidueMaskedFloat64x8 [a] x mask)
+ // match: (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask)
// result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
for {
a := auxIntToInt8(v.AuxInt)
return true
}
}
-func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
- v_0 := v.Args[0]
- // match: (RoundToEven x)
- // result: (ROUNDSD [0] x)
- for {
- x := v_0
- v.reset(OpAMD64ROUNDSD)
- v.AuxInt = int8ToAuxInt(0)
- v.AddArg(x)
- return true
- }
-}
func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
}
return false
}
-func rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddDotProdMaskedInt32x16 x y z mask)
- // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPWSSDSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddDotProdMaskedInt32x4 x y z mask)
- // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPWSSDSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedAddDotProdMaskedInt32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedAddDotProdMaskedInt32x8 x y z mask)
- // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPWSSDSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask)
- // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask)
- // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask)
- // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- mask := v_2
- v.reset(OpAMD64VPMADDUBSWMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(x, y, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask)
- // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask)
- // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask)
- // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDSMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint16x16 x y mask)
- // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // result: (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSWMasked256)
+ v.reset(OpAMD64VPSUBUSWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint16x32 x y mask)
- // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+ // result: (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSWMasked512)
+ v.reset(OpAMD64VPSUBUSWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint16x8 x y mask)
- // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // result: (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSWMasked128)
+ v.reset(OpAMD64VPSUBUSWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint8x16 x y mask)
- // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+ // result: (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked128)
+ v.reset(OpAMD64VPSUBUSBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint8x32 x y mask)
- // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+ // result: (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked256)
+ v.reset(OpAMD64VPSUBUSBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
v_0 := v.Args[0]
b := v.Block
// match: (SubSaturatedMaskedUint8x64 x y mask)
- // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+ // result: (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
for {
x := v_0
y := v_1
mask := v_2
- v.reset(OpAMD64VPSUBSBMasked512)
+ v.reset(OpAMD64VPSUBUSBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(x, y, v0)
return true
}
}
-func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask)
- // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDMasked512)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask)
- // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDMasked128)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
-func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask)
- // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- x := v_0
- y := v_1
- z := v_2
- mask := v_3
- v.reset(OpAMD64VPDPBUSDMasked256)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg4(x, y, z, v0)
- return true
- }
-}
func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
const simdPackage = "simd"
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
- addF(simdPackage, "Int8x16.Absolute", opLen1(ssa.OpAbsoluteInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.Absolute", opLen1(ssa.OpAbsoluteInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.Absolute", opLen1(ssa.OpAbsoluteInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.Absolute", opLen1(ssa.OpAbsoluteInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.Absolute", opLen1(ssa.OpAbsoluteInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.Absolute", opLen1(ssa.OpAbsoluteInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.Absolute", opLen1(ssa.OpAbsoluteInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.Absolute", opLen1(ssa.OpAbsoluteInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.Absolute", opLen1(ssa.OpAbsoluteInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.Absolute", opLen1(ssa.OpAbsoluteInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.Absolute", opLen1(ssa.OpAbsoluteInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.Absolute", opLen1(ssa.OpAbsoluteInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.Abs", opLen1(ssa.OpAbsInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.Abs", opLen1(ssa.OpAbsInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Abs", opLen1(ssa.OpAbsInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.Abs", opLen1(ssa.OpAbsInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.Abs", opLen1(ssa.OpAbsInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.Abs", opLen1(ssa.OpAbsInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.AddDotProd", opLen3(ssa.OpAddDotProdInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.AddDotProd", opLen3(ssa.OpAddDotProdInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.AddDotProd", opLen3(ssa.OpAddDotProdInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.AddDotProdMasked", opLen4(ssa.OpAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.CopySign", opLen2(ssa.OpCopySignInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x4.CopySign", opLen2(ssa.OpCopySignInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.CopySign", opLen2(ssa.OpCopySignInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
addF(simdPackage, "Int64x2.Mul", opLen2(ssa.OpMulInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Mul", opLen2(ssa.OpMulInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.Mul", opLen2(ssa.OpMulInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.Mul", opLen2(ssa.OpMulUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Mul", opLen2(ssa.OpMulUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Mul", opLen2(ssa.OpMulUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.Mul", opLen2(ssa.OpMulUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Mul", opLen2(ssa.OpMulUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Mul", opLen2(ssa.OpMulUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.Mul", opLen2(ssa.OpMulUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Mul", opLen2(ssa.OpMulUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Mul", opLen2(ssa.OpMulUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulAdd", opLen3(ssa.OpMulAddFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulAdd", opLen3(ssa.OpMulAddFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulAdd", opLen3(ssa.OpMulAddFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.OnesCount", opLen1(ssa.OpOnesCountInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.OnesCount", opLen1(ssa.OpOnesCountInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.OnesCount", opLen1(ssa.OpOnesCountInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.OnesCount", opLen1(ssa.OpOnesCountInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.OnesCount", opLen1(ssa.OpOnesCountInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.OnesCount", opLen1(ssa.OpOnesCountInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.OnesCount", opLen1(ssa.OpOnesCountInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.OnesCount", opLen1(ssa.OpOnesCountInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.OnesCount", opLen1(ssa.OpOnesCountInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.OnesCount", opLen1(ssa.OpOnesCountUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.OnesCount", opLen1(ssa.OpOnesCountUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.OnesCount", opLen1(ssa.OpOnesCountUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.OnesCount", opLen1(ssa.OpOnesCountUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.OnesCount", opLen1(ssa.OpOnesCountUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.OnesCount", opLen1(ssa.OpOnesCountUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.OnesCount", opLen1(ssa.OpOnesCountUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.OnesCount", opLen1(ssa.OpOnesCountUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.OnesCount", opLen1(ssa.OpOnesCountUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.PopCount", opLen1(ssa.OpPopCountInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.PopCount", opLen1(ssa.OpPopCountInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.PopCount", opLen1(ssa.OpPopCountInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.PopCount", opLen1(ssa.OpPopCountInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PopCount", opLen1(ssa.OpPopCountInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.PopCount", opLen1(ssa.OpPopCountInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.PopCount", opLen1(ssa.OpPopCountInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.PopCount", opLen1(ssa.OpPopCountInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.PopCount", opLen1(ssa.OpPopCountInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.PopCount", opLen1(ssa.OpPopCountInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.PopCount", opLen1(ssa.OpPopCountInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.PopCount", opLen1(ssa.OpPopCountInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.PopCount", opLen1(ssa.OpPopCountUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.PopCount", opLen1(ssa.OpPopCountUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.PopCount", opLen1(ssa.OpPopCountUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.PopCount", opLen1(ssa.OpPopCountUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.PopCount", opLen1(ssa.OpPopCountUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.PopCount", opLen1(ssa.OpPopCountUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint32x4.PopCount", opLen1(ssa.OpPopCountUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.PopCount", opLen1(ssa.OpPopCountUint32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x16.PopCount", opLen1(ssa.OpPopCountUint32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundScaled", opLen1Imm8(ssa.OpRoundScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundScaledMasked", opLen2Imm8(ssa.OpRoundScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundScaledResidue", opLen1Imm8(ssa.OpRoundScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float32x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float32x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float32x16.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Float64x2.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
- addF(simdPackage, "Float64x4.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
- addF(simdPackage, "Float64x8.RoundScaledResidueMasked", opLen2Imm8(ssa.OpRoundScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Int32x4.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.SaturatedAddDotProd", opLen3(ssa.OpSaturatedAddDotProdInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x4.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.SaturatedAddDotProdMasked", opLen4(ssa.OpSaturatedAddDotProdMaskedInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
+ addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
+ addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
+ addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.Sign", opLen2(ssa.OpSignInt16x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x4.Sign", opLen2(ssa.OpSignInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.Sign", opLen2(ssa.OpSignInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
- addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulate", opLen3_31(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
package simd
-/* Absolute */
+/* Abs */
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX
-func (x Int8x16) Absolute() Int8x16
+func (x Int8x16) Abs() Int8x16
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX2
-func (x Int8x32) Absolute() Int8x32
+func (x Int8x32) Abs() Int8x32
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x64) Absolute() Int8x64
+func (x Int8x64) Abs() Int8x64
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX
-func (x Int16x8) Absolute() Int16x8
+func (x Int16x8) Abs() Int16x8
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX2
-func (x Int16x16) Absolute() Int16x16
+func (x Int16x16) Abs() Int16x16
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x32) Absolute() Int16x32
+func (x Int16x32) Abs() Int16x32
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX
-func (x Int32x4) Absolute() Int32x4
+func (x Int32x4) Abs() Int32x4
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX2
-func (x Int32x8) Absolute() Int32x8
+func (x Int32x8) Abs() Int32x8
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x16) Absolute() Int32x16
+func (x Int32x16) Abs() Int32x16
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x2) Absolute() Int64x2
+func (x Int64x2) Abs() Int64x2
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x4) Absolute() Int64x4
+func (x Int64x4) Abs() Int64x4
-// Absolute computes the absolute value of each element.
+// Abs computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x8) Absolute() Int64x8
+func (x Int64x8) Abs() Int64x8
-/* AbsoluteMasked */
+/* AbsMasked */
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x16) AbsoluteMasked(mask Mask8x16) Int8x16
+func (x Int8x16) AbsMasked(mask Mask8x16) Int8x16
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x32) AbsoluteMasked(mask Mask8x32) Int8x32
+func (x Int8x32) AbsMasked(mask Mask8x32) Int8x32
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSB, CPU Feature: AVX512BW
-func (x Int8x64) AbsoluteMasked(mask Mask8x64) Int8x64
+func (x Int8x64) AbsMasked(mask Mask8x64) Int8x64
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x8) AbsoluteMasked(mask Mask16x8) Int16x8
+func (x Int16x8) AbsMasked(mask Mask16x8) Int16x8
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x16) AbsoluteMasked(mask Mask16x16) Int16x16
+func (x Int16x16) AbsMasked(mask Mask16x16) Int16x16
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSW, CPU Feature: AVX512BW
-func (x Int16x32) AbsoluteMasked(mask Mask16x32) Int16x32
+func (x Int16x32) AbsMasked(mask Mask16x32) Int16x32
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x4) AbsoluteMasked(mask Mask32x4) Int32x4
+func (x Int32x4) AbsMasked(mask Mask32x4) Int32x4
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x8) AbsoluteMasked(mask Mask32x8) Int32x8
+func (x Int32x8) AbsMasked(mask Mask32x8) Int32x8
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSD, CPU Feature: AVX512F
-func (x Int32x16) AbsoluteMasked(mask Mask32x16) Int32x16
+func (x Int32x16) AbsMasked(mask Mask32x16) Int32x16
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x2) AbsoluteMasked(mask Mask64x2) Int64x2
+func (x Int64x2) AbsMasked(mask Mask64x2) Int64x2
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x4) AbsoluteMasked(mask Mask64x4) Int64x4
+func (x Int64x4) AbsMasked(mask Mask64x4) Int64x4
-// AbsoluteMasked computes the absolute value of each element.
+// AbsMasked computes the absolute value of each element.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPABSQ, CPU Feature: AVX512F
-func (x Int64x8) AbsoluteMasked(mask Mask64x8) Int64x8
+func (x Int64x8) AbsMasked(mask Mask64x8) Int64x8
/* Add */
// Asm: VPADDQ, CPU Feature: AVX512F
func (x Uint64x8) Add(y Uint64x8) Uint64x8
-/* AddDotProd */
+/* AddDotProdPairsSaturated */
-// AddDotProd performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
//
-// Asm: VPDPWSSD, CPU Feature: AVXVNNI
-func (x Int32x4) AddDotProd(y Int16x8, z Int16x8) Int32x4
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
+func (x Int32x4) AddDotProdPairsSaturated(y Int16x8, z Int16x8) Int32x4
+
+// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
+func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
+
+// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
+
+/* AddDotProdPairsSaturatedMasked */
+
+// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x4) AddDotProdPairsSaturatedMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
+
+// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x8) AddDotProdPairsSaturatedMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
+
+// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
+func (x Int32x16) AddDotProdPairsSaturatedMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
+
+/* AddDotProdQuadruple */
+
+// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
+func (x Int8x16) AddDotProdQuadruple(y Uint8x16, z Int32x4) Int32x4
+
+// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// Asm: VPDPBUSD, CPU Feature: AVXVNNI
+func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
+
+// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
+func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
+
+/* AddDotProdQuadrupleMasked */
+
+// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
+func (x Int8x16) AddDotProdQuadrupleMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
+
+// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
+func (x Int8x32) AddDotProdQuadrupleMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
+
+// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
+func (x Int8x64) AddDotProdQuadrupleMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
+
+/* AddDotProdQuadrupleSaturated */
+
+// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
+func (x Int8x16) AddDotProdQuadrupleSaturated(y Uint8x16, z Int32x4) Int32x4
-// AddDotProd performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
-// Asm: VPDPWSSD, CPU Feature: AVXVNNI
-func (x Int32x8) AddDotProd(y Int16x16, z Int16x16) Int32x8
+// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
+func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
-// AddDotProd performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProd(y Int16x32, z Int16x32) Int32x16
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
-/* AddDotProdMasked */
+/* AddDotProdQuadrupleSaturatedMasked */
-// AddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x4) AddDotProdMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x16) AddDotProdQuadrupleSaturatedMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-// AddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x8) AddDotProdMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x32) AddDotProdQuadrupleSaturatedMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-// AddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
+// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPDPWSSD, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
+// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
+func (x Int8x64) AddDotProdQuadrupleSaturatedMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
/* AddMasked */
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSB, CPU Feature: AVX
+// Asm: VPADDUSB, CPU Feature: AVX
func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSB, CPU Feature: AVX2
+// Asm: VPADDUSB, CPU Feature: AVX2
func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
+// Asm: VPADDUSB, CPU Feature: AVX512BW
func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSW, CPU Feature: AVX
+// Asm: VPADDUSW, CPU Feature: AVX
func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSW, CPU Feature: AVX2
+// Asm: VPADDUSW, CPU Feature: AVX2
func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
// AddSaturated adds corresponding elements of two vectors with saturation.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
+// Asm: VPADDUSW, CPU Feature: AVX512BW
func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
/* AddSaturatedMasked */
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
+// Asm: VPADDUSB, CPU Feature: AVX512BW
func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
+// Asm: VPADDUSB, CPU Feature: AVX512BW
func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSB, CPU Feature: AVX512BW
+// Asm: VPADDUSB, CPU Feature: AVX512BW
func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
+// Asm: VPADDUSW, CPU Feature: AVX512BW
func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
+// Asm: VPADDUSW, CPU Feature: AVX512BW
func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPADDSW, CPU Feature: AVX512BW
+// Asm: VPADDUSW, CPU Feature: AVX512BW
func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* AddSub */
// Asm: VPANDNQ, CPU Feature: AVX512F
func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
-/* ApproximateReciprocal */
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCPPS, CPU Feature: AVX
-func (x Float32x4) ApproximateReciprocal() Float32x4
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCPPS, CPU Feature: AVX
-func (x Float32x8) ApproximateReciprocal() Float32x8
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocal() Float32x16
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocal() Float64x2
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocal() Float64x4
-
-// ApproximateReciprocal computes an approximate reciprocal of each element.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocal() Float64x8
-
-/* ApproximateReciprocalMasked */
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalMasked(mask Mask32x4) Float32x4
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalMasked(mask Mask32x8) Float32x8
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalMasked(mask Mask32x16) Float32x16
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalMasked(mask Mask64x2) Float64x2
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalMasked(mask Mask64x4) Float64x4
-
-// ApproximateReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalMasked(mask Mask64x8) Float64x8
-
-/* ApproximateReciprocalOfSqrt */
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRTPS, CPU Feature: AVX
-func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRTPS, CPU Feature: AVX
-func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalOfSqrt() Float32x16
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4
-
-// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8
-
-/* ApproximateReciprocalOfSqrtMasked */
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x4) ApproximateReciprocalOfSqrtMasked(mask Mask32x4) Float32x4
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x8) ApproximateReciprocalOfSqrtMasked(mask Mask32x8) Float32x8
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512F
-func (x Float32x16) ApproximateReciprocalOfSqrtMasked(mask Mask32x16) Float32x16
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x2) ApproximateReciprocalOfSqrtMasked(mask Mask64x2) Float64x2
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x4) ApproximateReciprocalOfSqrtMasked(mask Mask64x4) Float64x4
-
-// ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512F
-func (x Float64x8) ApproximateReciprocalOfSqrtMasked(mask Mask64x8) Float64x8
-
/* Average */
// Average computes the rounded average of corresponding elements.
// Asm: VCVTPS2UDQ, CPU Feature: AVX512F
func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
+/* CopySign */
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGNB, CPU Feature: AVX
+func (x Int8x16) CopySign(y Int8x16) Int8x16
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGNB, CPU Feature: AVX2
+func (x Int8x32) CopySign(y Int8x32) Int8x32
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGNW, CPU Feature: AVX
+func (x Int16x8) CopySign(y Int16x8) Int16x8
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGNW, CPU Feature: AVX2
+func (x Int16x16) CopySign(y Int16x16) Int16x16
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGND, CPU Feature: AVX
+func (x Int32x4) CopySign(y Int32x4) Int32x4
+
+// CopySign returns the product of the first operand with -1, 0, or 1,
+// whichever constant is nearest to the value of the second operand.
+//
+// Asm: VPSIGND, CPU Feature: AVX2
+func (x Int32x8) CopySign(y Int32x8) Int32x8
+
/* Div */
// Div divides elements of two vectors.
// Asm: VDIVPD, CPU Feature: AVX512F
func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
-/* DotProdBroadcast */
+/* DotProdPairs */
+
+// DotProdPairs multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
+
+// DotProdPairs multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDWD, CPU Feature: AVX2
+func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
+
+// DotProdPairs multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
+
+/* DotProdPairsMasked */
+
+// DotProdPairsMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x8) DotProdPairsMasked(y Int16x8, mask Mask16x8) Int32x4
+
+// DotProdPairsMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x16) DotProdPairsMasked(y Int16x16, mask Mask16x16) Int32x8
+
+// DotProdPairsMasked multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMADDWD, CPU Feature: AVX512BW
+func (x Int16x32) DotProdPairsMasked(y Int16x32, mask Mask16x32) Int32x16
+
+/* DotProdPairsSaturated */
+
+// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX
+func (x Uint8x16) DotProdPairsSaturated(y Int8x16) Int16x8
+
+// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX2
+func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
+
+// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
+func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
+
+/* DotProdPairsSaturatedMasked */
-// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
//
-// Asm: VDPPS, CPU Feature: AVX
-func (x Float32x4) DotProdBroadcast(y Float32x4) Float32x4
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
+func (x Uint8x16) DotProdPairsSaturatedMasked(y Int8x16, mask Mask16x8) Int16x8
-// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
//
-// Asm: VDPPS, CPU Feature: AVX
-func (x Float32x8) DotProdBroadcast(y Float32x8) Float32x8
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
+func (x Uint8x32) DotProdPairsSaturatedMasked(y Int8x32, mask Mask16x16) Int16x16
-// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
+// yielding a vector of half as many elements with twice the input element size.
+//
+// This operation is applied selectively under a write mask.
//
-// Asm: VDPPD, CPU Feature: AVX
-func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
+// Asm: VPMADDUBSW, CPU Feature: AVX512BW
+func (x Uint8x64) DotProdPairsSaturatedMasked(y Int8x64, mask Mask16x32) Int16x32
/* Equal */
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-/* FusedMultiplyAdd */
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAdd(y Float32x4, z Float32x4) Float32x4
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAdd(y Float32x8, z Float32x8) Float32x8
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAdd(y Float32x16, z Float32x16) Float32x16
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAdd(y Float64x2, z Float64x2) Float64x2
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4
-
-// FusedMultiplyAdd performs (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8
-
-/* FusedMultiplyAddMasked */
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// FusedMultiplyAddMasked performs (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* FusedMultiplyAddSub */
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddSub(y Float32x4, z Float32x4) Float32x4
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddSub(y Float32x8, z Float32x8) Float32x8
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddSub(y Float32x16, z Float32x16) Float32x16
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddSub(y Float64x2, z Float64x2) Float64x2
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4
-
-// FusedMultiplyAddSub performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8
-
-/* FusedMultiplyAddSubMasked */
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* FusedMultiplySubAdd */
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplySubAdd(y Float32x4, z Float32x4) Float32x4
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplySubAdd(y Float32x8, z Float32x8) Float32x8
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplySubAdd(y Float32x16, z Float32x16) Float32x16
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplySubAdd(y Float64x2, z Float64x2) Float64x2
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
-
-// FusedMultiplySubAdd performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
-
-/* FusedMultiplySubAddMasked */
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
-func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
-func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* GaloisFieldAffineTransform */
+/* GaloisFieldAffineTransform */
// GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x8) Mul(y Int64x8) Int64x8
-/* MulEvenWiden */
+// Mul multiplies corresponding elements of two vectors.
+//
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Uint16x8) Mul(y Uint16x8) Uint16x8
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULDQ, CPU Feature: AVX
-func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Uint16x16) Mul(y Uint16x16) Uint16x16
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULDQ, CPU Feature: AVX2
-func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Uint16x32) Mul(y Uint16x32) Uint16x32
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x2) MulEvenWiden(y Int64x2) Int64x2
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Uint32x4) Mul(y Uint32x4) Uint32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x4) MulEvenWiden(y Int64x4) Int64x4
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Uint32x8) Mul(y Uint32x8) Uint32x8
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x8) MulEvenWiden(y Int64x8) Int64x8
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Uint32x16) Mul(y Uint32x16) Uint32x16
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX
-func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x2) Mul(y Uint64x2) Uint64x2
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
//
-// Asm: VPMULUDQ, CPU Feature: AVX2
-func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x4) Mul(y Uint64x4) Uint64x4
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Mul multiplies corresponding elements of two vectors.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x8) Mul(y Uint64x8) Uint64x8
+
+/* MulAdd */
+
+// MulAdd performs a fused (x * y) + z.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x2) MulEvenWiden(y Uint64x2) Uint64x2
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAdd performs a fused (x * y) + z.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAdd performs a fused (x * y) + z.
+//
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
+
+// MulAdd performs a fused (x * y) + z.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
-/* MulEvenWidenMasked */
+// MulAdd performs a fused (x * y) + z.
+//
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAdd performs a fused (x * y) + z.
+//
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
+
+/* MulAddMasked */
+
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x2) MulEvenWidenMasked(y Int64x2, mask Mask64x2) Int64x2
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x4) MulAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x4) MulEvenWidenMasked(y Int64x4, mask Mask64x4) Int64x4
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x8) MulAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULDQ, CPU Feature: AVX512F
-func (x Int64x8) MulEvenWidenMasked(y Int64x8, mask Mask64x8) Int64x8
+// Asm: VFMADD213PS, CPU Feature: AVX512F
+func (x Float32x16) MulAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x2) MulAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x4) MulAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-// MulEvenWidenMasked multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// MulAddMasked performs a fused (x * y) + z.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULUDQ, CPU Feature: AVX512F
-func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VFMADD213PD, CPU Feature: AVX512F
+func (x Float64x8) MulAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-/* MulHigh */
+/* MulAddSub */
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHW, CPU Feature: AVX
-func (x Int16x8) MulHigh(y Int16x8) Int16x8
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHW, CPU Feature: AVX2
-func (x Int16x16) MulHigh(y Int16x16) Int16x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x32) MulHigh(y Int16x32) Int16x32
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHUW, CPU Feature: AVX
-func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHUW, CPU Feature: AVX2
-func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
-// MulHigh multiplies elements and stores the high part of the result.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
-// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
-/* MulHighMasked */
+/* MulAddSubMasked */
-// MulHighMasked multiplies elements and stores the high part of the result.
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x4) MulAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-// MulHighMasked multiplies elements and stores the high part of the result.
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x8) MulAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-// MulHighMasked multiplies elements and stores the high part of the result.
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULHW, CPU Feature: AVX512BW
-func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512F
+func (x Float32x16) MulAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
+
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x2) MulAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
+
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x4) MulAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
+
+// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512F
+func (x Float64x8) MulAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
+
+/* MulEvenWiden */
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULDQ, CPU Feature: AVX
+func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULDQ, CPU Feature: AVX2
+func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULUDQ, CPU Feature: AVX
+func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
+//
+// Asm: VPMULUDQ, CPU Feature: AVX2
+func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
+
+/* MulHigh */
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHUW, CPU Feature: AVX
+func (x Int16x8) MulHigh(y Int16x8) Int16x8
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHUW, CPU Feature: AVX2
+func (x Int16x16) MulHigh(y Int16x16) Int16x16
+
+// MulHigh multiplies elements and stores the high part of the result.
+//
+// Asm: VPMULHW, CPU Feature: AVX512BW
+func (x Int16x32) MulHigh(y Int16x32) Int16x32
+
+/* MulHighMasked */
// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
+func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPMULHW, CPU Feature: AVX512BW
+func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
// MulHighMasked multiplies elements and stores the high part of the result.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPMULHUW, CPU Feature: AVX512BW
-func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
+func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
/* MulMasked */
// Asm: VPMULLQ, CPU Feature: AVX512DQ
func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Uint16x8) MulMasked(y Uint16x8, mask Mask16x8) Uint16x8
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Uint16x16) MulMasked(y Uint16x16, mask Mask16x16) Uint16x16
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLW, CPU Feature: AVX512BW
+func (x Uint16x32) MulMasked(y Uint16x32, mask Mask16x32) Uint16x32
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Uint32x4) MulMasked(y Uint32x4, mask Mask32x4) Uint32x4
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Uint32x8) MulMasked(y Uint32x8, mask Mask32x8) Uint32x8
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLD, CPU Feature: AVX512F
+func (x Uint32x16) MulMasked(y Uint32x16, mask Mask32x16) Uint32x16
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x2) MulMasked(y Uint64x2, mask Mask64x2) Uint64x2
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x4) MulMasked(y Uint64x4, mask Mask64x4) Uint64x4
+
+// MulMasked multiplies corresponding elements of two vectors.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPMULLQ, CPU Feature: AVX512DQ
+func (x Uint64x8) MulMasked(y Uint64x8, mask Mask64x8) Uint64x8
+
+/* MulSubAdd */
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
+
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
+
+/* MulSubAddMasked */
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x4) MulSubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x8) MulSubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512F
+func (x Float32x16) MulSubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x2) MulSubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x4) MulSubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
+
+// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512F
+func (x Float64x8) MulSubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
+
/* NotEqual */
// NotEqual compares for inequality.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VPCMPB, CPU Feature: AVX512BW
+func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPB, CPU Feature: AVX512BW
+func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPW, CPU Feature: AVX512BW
+func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPW, CPU Feature: AVX512BW
+func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPW, CPU Feature: AVX512BW
+func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPD, CPU Feature: AVX512F
+func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPD, CPU Feature: AVX512F
+func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPD, CPU Feature: AVX512F
+func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512F
+func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512F
+func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPQ, CPU Feature: AVX512F
+func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512BW
+func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512BW
+func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUB, CPU Feature: AVX512BW
+func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512BW
+func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512BW
+func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUW, CPU Feature: AVX512BW
+func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512F
+func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512F
+func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUD, CPU Feature: AVX512F
+func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512F
+func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512F
+func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+
+// NotEqualMasked compares for inequality.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPCMPUQ, CPU Feature: AVX512F
+func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+
+/* OnesCount */
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x16) OnesCount() Int8x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x32) OnesCount() Int8x32
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x64) OnesCount() Int8x64
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x8) OnesCount() Int16x8
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x16) OnesCount() Int16x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x32) OnesCount() Int16x32
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x4) OnesCount() Int32x4
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x8) OnesCount() Int32x8
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x16) OnesCount() Int32x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x2) OnesCount() Int64x2
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x4) OnesCount() Int64x4
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x8) OnesCount() Int64x8
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x16) OnesCount() Uint8x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x32) OnesCount() Uint8x32
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x64) OnesCount() Uint8x64
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x8) OnesCount() Uint16x8
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x16) OnesCount() Uint16x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x32) OnesCount() Uint16x32
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x4) OnesCount() Uint32x4
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x8) OnesCount() Uint32x8
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x16) OnesCount() Uint32x16
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x2) OnesCount() Uint64x2
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x4) OnesCount() Uint64x4
+
+// OnesCount counts the number of set bits in each element.
+//
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x8) OnesCount() Uint64x8
+
+/* OnesCountMasked */
+
+// OnesCountMasked counts the number of set bits in each element.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x16) OnesCountMasked(mask Mask8x16) Int8x16
+
+// OnesCountMasked counts the number of set bits in each element.
+//
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x32) OnesCountMasked(mask Mask8x32) Int8x32
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPB, CPU Feature: AVX512BW
-func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x64) OnesCountMasked(mask Mask8x64) Int8x64
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x8) OnesCountMasked(mask Mask16x8) Int16x8
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x16) OnesCountMasked(mask Mask16x16) Int16x16
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPW, CPU Feature: AVX512BW
-func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x32) OnesCountMasked(mask Mask16x32) Int16x32
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x4) OnesCountMasked(mask Mask32x4) Int32x4
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x8) OnesCountMasked(mask Mask32x8) Int32x8
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPD, CPU Feature: AVX512F
-func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x16) OnesCountMasked(mask Mask32x16) Int32x16
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x2) OnesCountMasked(mask Mask64x2) Int64x2
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x4) OnesCountMasked(mask Mask64x4) Int64x4
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPQ, CPU Feature: AVX512F
-func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x8) OnesCountMasked(mask Mask64x8) Int64x8
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x16) OnesCountMasked(mask Mask8x16) Uint8x16
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x32) OnesCountMasked(mask Mask8x32) Uint8x32
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUB, CPU Feature: AVX512BW
-func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x64) OnesCountMasked(mask Mask8x64) Uint8x64
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x8) OnesCountMasked(mask Mask16x8) Uint16x8
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x16) OnesCountMasked(mask Mask16x16) Uint16x16
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUW, CPU Feature: AVX512BW
-func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x32) OnesCountMasked(mask Mask16x32) Uint16x32
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x4) OnesCountMasked(mask Mask32x4) Uint32x4
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x8) OnesCountMasked(mask Mask32x8) Uint32x8
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUD, CPU Feature: AVX512F
-func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x16) OnesCountMasked(mask Mask32x16) Uint32x16
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x2) OnesCountMasked(mask Mask64x2) Uint64x2
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x4) OnesCountMasked(mask Mask64x4) Uint64x4
-// NotEqualMasked compares for inequality.
+// OnesCountMasked counts the number of set bits in each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPCMPUQ, CPU Feature: AVX512F
-func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x8) OnesCountMasked(mask Mask64x8) Uint64x8
/* Or */
// Asm: VPORQ, CPU Feature: AVX512F
func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-/* PairDotProd */
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) PairDotProd(y Int16x8) Int32x4
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX2
-func (x Int16x16) PairDotProd(y Int16x16) Int32x8
-
-// PairDotProd multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProd(y Int16x32) Int32x16
-
-/* PairDotProdMasked */
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x8) PairDotProdMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x16) PairDotProdMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// PairDotProdMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512BW
-func (x Int16x32) PairDotProdMasked(y Int16x32, mask Mask16x32) Int32x16
-
/* Permute */
// Permute performs a full permutation of vector x using indices:
// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
// Only the needed bits to represent x's index are used in indices' elements.
//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512F
-func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512F
-func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512F
-func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512F
-func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
-
-/* PopCount */
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) PopCount() Int8x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) PopCount() Int8x32
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) PopCount() Int8x64
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) PopCount() Int16x8
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) PopCount() Int16x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) PopCount() Int16x32
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) PopCount() Int32x4
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) PopCount() Int32x8
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) PopCount() Int32x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) PopCount() Int64x2
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) PopCount() Int64x4
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) PopCount() Int64x8
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) PopCount() Uint8x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) PopCount() Uint8x32
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) PopCount() Uint8x64
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) PopCount() Uint16x8
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) PopCount() Uint16x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) PopCount() Uint16x32
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) PopCount() Uint32x4
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) PopCount() Uint32x8
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) PopCount() Uint32x16
-
-// PopCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) PopCount() Uint64x2
-
-// PopCount counts the number of set bits in each element.
+// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) PopCount() Uint64x4
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-// PopCount counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) PopCount() Uint64x8
-
-/* PopCountMasked */
+// This operation is applied selectively under a write mask.
+//
+// Asm: VPERMD, CPU Feature: AVX512F
+func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) PopCountMasked(mask Mask8x16) Int8x16
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) PopCountMasked(mask Mask8x32) Int8x32
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) PopCountMasked(mask Mask8x64) Int8x64
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) PopCountMasked(mask Mask16x8) Int16x8
+// Asm: VPERMPD, CPU Feature: AVX512F
+func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) PopCountMasked(mask Mask16x16) Int16x16
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
-// PopCountMasked counts the number of set bits in each element.
+// PermuteMasked performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) PopCountMasked(mask Mask16x32) Int16x32
+// Asm: VPERMQ, CPU Feature: AVX512F
+func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
+
+/* Reciprocal */
-// PopCountMasked counts the number of set bits in each element.
+// Reciprocal computes an approximate reciprocal of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRCPPS, CPU Feature: AVX
+func (x Float32x4) Reciprocal() Float32x4
+
+// Reciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) PopCountMasked(mask Mask32x4) Int32x4
+// Asm: VRCPPS, CPU Feature: AVX
+func (x Float32x8) Reciprocal() Float32x8
-// PopCountMasked counts the number of set bits in each element.
+// Reciprocal computes an approximate reciprocal of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRCP14PS, CPU Feature: AVX512F
+func (x Float32x16) Reciprocal() Float32x16
+
+// Reciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) PopCountMasked(mask Mask32x8) Int32x8
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x2) Reciprocal() Float64x2
-// PopCountMasked counts the number of set bits in each element.
+// Reciprocal computes an approximate reciprocal of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x4) Reciprocal() Float64x4
+
+// Reciprocal computes an approximate reciprocal of each element.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) PopCountMasked(mask Mask32x16) Int32x16
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x8) Reciprocal() Float64x8
+
+/* ReciprocalMasked */
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) PopCountMasked(mask Mask64x2) Int64x2
+// Asm: VRCP14PS, CPU Feature: AVX512F
+func (x Float32x4) ReciprocalMasked(mask Mask32x4) Float32x4
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) PopCountMasked(mask Mask64x4) Int64x4
+// Asm: VRCP14PS, CPU Feature: AVX512F
+func (x Float32x8) ReciprocalMasked(mask Mask32x8) Float32x8
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) PopCountMasked(mask Mask64x8) Int64x8
+// Asm: VRCP14PS, CPU Feature: AVX512F
+func (x Float32x16) ReciprocalMasked(mask Mask32x16) Float32x16
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) PopCountMasked(mask Mask8x16) Uint8x16
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x2) ReciprocalMasked(mask Mask64x2) Float64x2
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) PopCountMasked(mask Mask8x32) Uint8x32
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x4) ReciprocalMasked(mask Mask64x4) Float64x4
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalMasked computes an approximate reciprocal of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) PopCountMasked(mask Mask8x64) Uint8x64
+// Asm: VRCP14PD, CPU Feature: AVX512F
+func (x Float64x8) ReciprocalMasked(mask Mask64x8) Float64x8
-// PopCountMasked counts the number of set bits in each element.
+/* ReciprocalSqrt */
+
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRSQRTPS, CPU Feature: AVX
+func (x Float32x4) ReciprocalSqrt() Float32x4
+
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) PopCountMasked(mask Mask16x8) Uint16x8
+// Asm: VRSQRTPS, CPU Feature: AVX
+func (x Float32x8) ReciprocalSqrt() Float32x8
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
+func (x Float32x16) ReciprocalSqrt() Float32x16
+
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) PopCountMasked(mask Mask16x16) Uint16x16
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x2) ReciprocalSqrt() Float64x2
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// This operation is applied selectively under a write mask.
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x4) ReciprocalSqrt() Float64x4
+
+// ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) PopCountMasked(mask Mask16x32) Uint16x32
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x8) ReciprocalSqrt() Float64x8
-// PopCountMasked counts the number of set bits in each element.
+/* ReciprocalSqrtMasked */
+
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) PopCountMasked(mask Mask32x4) Uint32x4
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
+func (x Float32x4) ReciprocalSqrtMasked(mask Mask32x4) Float32x4
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) PopCountMasked(mask Mask32x8) Uint32x8
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
+func (x Float32x8) ReciprocalSqrtMasked(mask Mask32x8) Float32x8
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) PopCountMasked(mask Mask32x16) Uint32x16
+// Asm: VRSQRT14PS, CPU Feature: AVX512F
+func (x Float32x16) ReciprocalSqrtMasked(mask Mask32x16) Float32x16
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) PopCountMasked(mask Mask64x2) Uint64x2
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x2) ReciprocalSqrtMasked(mask Mask64x2) Float64x2
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) PopCountMasked(mask Mask64x4) Uint64x4
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x4) ReciprocalSqrtMasked(mask Mask64x4) Float64x4
-// PopCountMasked counts the number of set bits in each element.
+// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) PopCountMasked(mask Mask64x8) Uint64x8
+// Asm: VRSQRT14PD, CPU Feature: AVX512F
+func (x Float64x8) ReciprocalSqrtMasked(mask Mask64x8) Float64x8
/* RotateAllLeft */
// Asm: VPRORVQ, CPU Feature: AVX512F
func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-/* Round */
+/* RoundToEven */
-// Round rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x4) Round() Float32x4
+func (x Float32x4) RoundToEven() Float32x4
-// Round rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPS, CPU Feature: AVX
-func (x Float32x8) Round() Float32x8
+func (x Float32x8) RoundToEven() Float32x8
-// Round rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x2) Round() Float64x2
+func (x Float64x2) RoundToEven() Float64x2
-// Round rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer.
//
// Asm: VROUNDPD, CPU Feature: AVX
-func (x Float64x4) Round() Float64x4
+func (x Float64x4) RoundToEven() Float64x4
-/* RoundScaled */
+/* RoundToEvenScaled */
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundScaled(prec uint8) Float32x4
+func (x Float32x4) RoundToEvenScaled(prec uint8) Float32x4
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundScaled(prec uint8) Float32x8
+func (x Float32x8) RoundToEvenScaled(prec uint8) Float32x8
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundScaled(prec uint8) Float32x16
+func (x Float32x16) RoundToEvenScaled(prec uint8) Float32x16
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundScaled(prec uint8) Float64x2
+func (x Float64x2) RoundToEvenScaled(prec uint8) Float64x2
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundScaled(prec uint8) Float64x4
+func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
-// RoundScaled rounds elements with specified precision.
+// RoundToEvenScaled rounds elements with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundScaled(prec uint8) Float64x8
+func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
-/* RoundScaledMasked */
+/* RoundToEvenScaledMasked */
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x4) RoundScaledMasked(prec uint8, mask Mask32x4) Float32x4
+func (x Float32x4) RoundToEvenScaledMasked(prec uint8, mask Mask32x4) Float32x4
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x8) RoundScaledMasked(prec uint8, mask Mask32x8) Float32x8
+func (x Float32x8) RoundToEvenScaledMasked(prec uint8, mask Mask32x8) Float32x8
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPS, CPU Feature: AVX512F
-func (x Float32x16) RoundScaledMasked(prec uint8, mask Mask32x16) Float32x16
+func (x Float32x16) RoundToEvenScaledMasked(prec uint8, mask Mask32x16) Float32x16
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x2) RoundScaledMasked(prec uint8, mask Mask64x2) Float64x2
+func (x Float64x2) RoundToEvenScaledMasked(prec uint8, mask Mask64x2) Float64x2
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x4) RoundScaledMasked(prec uint8, mask Mask64x4) Float64x4
+func (x Float64x4) RoundToEvenScaledMasked(prec uint8, mask Mask64x4) Float64x4
-// RoundScaledMasked rounds elements with specified precision.
+// RoundToEvenScaledMasked rounds elements with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VRNDSCALEPD, CPU Feature: AVX512F
-func (x Float64x8) RoundScaledMasked(prec uint8, mask Mask64x8) Float64x8
+func (x Float64x8) RoundToEvenScaledMasked(prec uint8, mask Mask64x8) Float64x8
-/* RoundScaledResidue */
+/* RoundToEvenScaledResidue */
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) RoundScaledResidue(prec uint8) Float32x4
+func (x Float32x4) RoundToEvenScaledResidue(prec uint8) Float32x4
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) RoundScaledResidue(prec uint8) Float32x8
+func (x Float32x8) RoundToEvenScaledResidue(prec uint8) Float32x8
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) RoundScaledResidue(prec uint8) Float32x16
+func (x Float32x16) RoundToEvenScaledResidue(prec uint8) Float32x16
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) RoundScaledResidue(prec uint8) Float64x2
+func (x Float64x2) RoundToEvenScaledResidue(prec uint8) Float64x2
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) RoundScaledResidue(prec uint8) Float64x4
+func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
-// RoundScaledResidue computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidue computes the difference after rounding with specified precision.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) RoundScaledResidue(prec uint8) Float64x8
+func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
-/* RoundScaledResidueMasked */
+/* RoundToEvenScaledResidueMasked */
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x4) RoundScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
+func (x Float32x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x8) RoundScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
+func (x Float32x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPS, CPU Feature: AVX512DQ
-func (x Float32x16) RoundScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
+func (x Float32x16) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x2) RoundScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
+func (x Float64x2) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x4) RoundScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
+func (x Float64x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-// RoundScaledResidueMasked computes the difference after rounding with specified precision.
+// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
//
// This operation is applied selectively under a write mask.
//
// prec is expected to be a constant, non-constant value will trigger a runtime panic.
//
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
-func (x Float64x8) RoundScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* SaturatedAddDotProd */
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x4) SaturatedAddDotProd(y Int16x8, z Int16x8) Int32x4
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x8) SaturatedAddDotProd(y Int16x16, z Int16x16) Int32x8
-
-// SaturatedAddDotProd performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedAddDotProd(y Int16x32, z Int16x32) Int32x16
-
-/* SaturatedAddDotProdMasked */
-
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) SaturatedAddDotProdMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) SaturatedAddDotProdMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-
-// SaturatedAddDotProdMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) SaturatedAddDotProdMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
-
-/* SaturatedUnsignedSignedPairDotProd */
-
-// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX
-func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8
-
-// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX2
-func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16
-
-// SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32
-
-/* SaturatedUnsignedSignedPairDotProdMasked */
-
-// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, mask Mask16x8) Int16x8
-
-// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, mask Mask16x16) Int16x16
-
-// SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512BW
-func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, mask Mask16x32) Int16x32
-
-/* SaturatedUnsignedSignedQuadDotProdAccumulate */
-
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
-
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
-
-// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
-
-/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */
-
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
+func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
/* Scale */
// Asm: VPSRLVQ, CPU Feature: AVX512F
func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-/* Sign */
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGNB, CPU Feature: AVX
-func (x Int8x16) Sign(y Int8x16) Int8x16
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGNB, CPU Feature: AVX2
-func (x Int8x32) Sign(y Int8x32) Int8x32
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGNW, CPU Feature: AVX
-func (x Int16x8) Sign(y Int16x8) Int16x8
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGNW, CPU Feature: AVX2
-func (x Int16x16) Sign(y Int16x16) Int16x16
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGND, CPU Feature: AVX
-func (x Int32x4) Sign(y Int32x4) Int32x4
-
-// Sign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
-//
-// Asm: VPSIGND, CPU Feature: AVX2
-func (x Int32x8) Sign(y Int32x8) Int32x8
-
/* Sqrt */
// Sqrt computes the square root of each element.
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSB, CPU Feature: AVX
+// Asm: VPSUBUSB, CPU Feature: AVX
func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSB, CPU Feature: AVX2
+// Asm: VPSUBUSB, CPU Feature: AVX2
func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
+// Asm: VPSUBUSB, CPU Feature: AVX512BW
func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSW, CPU Feature: AVX
+// Asm: VPSUBUSW, CPU Feature: AVX
func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSW, CPU Feature: AVX2
+// Asm: VPSUBUSW, CPU Feature: AVX2
func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
// SubSaturated subtracts corresponding elements of two vectors with saturation.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
+// Asm: VPSUBUSW, CPU Feature: AVX512BW
func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
/* SubSaturatedMasked */
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
+// Asm: VPSUBUSB, CPU Feature: AVX512BW
func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
+// Asm: VPSUBUSB, CPU Feature: AVX512BW
func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSB, CPU Feature: AVX512BW
+// Asm: VPSUBUSB, CPU Feature: AVX512BW
func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
+// Asm: VPSUBUSW, CPU Feature: AVX512BW
func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
+// Asm: VPSUBUSW, CPU Feature: AVX512BW
func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
//
// This operation is applied selectively under a write mask.
//
-// Asm: VPSUBSW, CPU Feature: AVX512BW
+// Asm: VPSUBUSW, CPU Feature: AVX512BW
func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* Trunc */
// Asm: VREDUCEPD, CPU Feature: AVX512DQ
func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-/* UnsignedSignedQuadDotProdAccumulate */
-
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4) Int32x4
-
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x32) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8) Int32x8
-
-// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
-
-/* UnsignedSignedQuadDotProdAccumulateMasked */
-
-// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x32) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
/* Xor */
// Xor performs a bitwise XOR operation between two vectors.
}
}
-func TestPairDotProdAccumulate(t *testing.T) {
- if !simd.HasAVX512GFNI() {
- // TODO: this function is actually VNNI, let's implement and call the right check.
- t.Skip("Test requires HasAVX512GFNI, not available on this hardware")
- return
- }
- x := simd.LoadInt16x8Slice([]int16{2, 2, 2, 2, 2, 2, 2, 2})
- z := simd.LoadInt32x4Slice([]int32{3, 3, 3, 3})
- want := []int32{11, 11, 11, 11}
- got := make([]int32, 4)
- z = z.AddDotProd(x, x)
- z.StoreSlice(got)
- for i := range 4 {
- if got[i] != want[i] {
- t.Errorf("a and b differ at index %d, got=%d, want=%d", i, got[i], want[i])
- }
- }
-}
-
var testShiftAllVal uint64 = 3
func TestShiftAll(t *testing.T) {
func TestFMA(t *testing.T) {
if simd.HasAVX512() {
- testFloat32x4TernaryFlaky(t, simd.Float32x4.FusedMultiplyAdd, fmaSlice[float32], 0.001)
- testFloat32x8TernaryFlaky(t, simd.Float32x8.FusedMultiplyAdd, fmaSlice[float32], 0.001)
- testFloat32x16TernaryFlaky(t, simd.Float32x16.FusedMultiplyAdd, fmaSlice[float32], 0.001)
- testFloat64x2Ternary(t, simd.Float64x2.FusedMultiplyAdd, fmaSlice[float64])
- testFloat64x4Ternary(t, simd.Float64x4.FusedMultiplyAdd, fmaSlice[float64])
- testFloat64x8Ternary(t, simd.Float64x8.FusedMultiplyAdd, fmaSlice[float64])
+ testFloat32x4TernaryFlaky(t, simd.Float32x4.MulAdd, fmaSlice[float32], 0.001)
+ testFloat32x8TernaryFlaky(t, simd.Float32x8.MulAdd, fmaSlice[float32], 0.001)
+ testFloat32x16TernaryFlaky(t, simd.Float32x16.MulAdd, fmaSlice[float32], 0.001)
+ testFloat64x2Ternary(t, simd.Float64x2.MulAdd, fmaSlice[float64])
+ testFloat64x4Ternary(t, simd.Float64x4.MulAdd, fmaSlice[float64])
+ testFloat64x8Ternary(t, simd.Float64x8.MulAdd, fmaSlice[float64])
}
}
}
func TestRound(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.Round, roundSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.Round, roundSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.Round, roundSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.Round, roundSlice[float64])
+ testFloat32x4Unary(t, simd.Float32x4.RoundToEven, roundSlice[float32])
+ testFloat32x8Unary(t, simd.Float32x8.RoundToEven, roundSlice[float32])
+ testFloat64x2Unary(t, simd.Float64x2.RoundToEven, roundSlice[float64])
+ testFloat64x4Unary(t, simd.Float64x4.RoundToEven, roundSlice[float64])
if simd.HasAVX512() {
// testFloat32x16Unary(t, simd.Float32x16.Round, roundSlice[float32]) // missing
// testFloat64x8Unary(t, simd.Float64x8.Round, roundSlice[float64]) // missing
}
func TestAbsolute(t *testing.T) {
- testInt8x16Unary(t, simd.Int8x16.Absolute, map1[int8](abs))
- testInt8x32Unary(t, simd.Int8x32.Absolute, map1[int8](abs))
- testInt16x8Unary(t, simd.Int16x8.Absolute, map1[int16](abs))
- testInt16x16Unary(t, simd.Int16x16.Absolute, map1[int16](abs))
- testInt32x4Unary(t, simd.Int32x4.Absolute, map1[int32](abs))
- testInt32x8Unary(t, simd.Int32x8.Absolute, map1[int32](abs))
+ testInt8x16Unary(t, simd.Int8x16.Abs, map1[int8](abs))
+ testInt8x32Unary(t, simd.Int8x32.Abs, map1[int8](abs))
+ testInt16x8Unary(t, simd.Int16x8.Abs, map1[int16](abs))
+ testInt16x16Unary(t, simd.Int16x16.Abs, map1[int16](abs))
+ testInt32x4Unary(t, simd.Int32x4.Abs, map1[int32](abs))
+ testInt32x8Unary(t, simd.Int32x8.Abs, map1[int32](abs))
if simd.HasAVX512() {
- testInt8x64Unary(t, simd.Int8x64.Absolute, map1[int8](abs))
- testInt16x32Unary(t, simd.Int16x32.Absolute, map1[int16](abs))
- testInt32x16Unary(t, simd.Int32x16.Absolute, map1[int32](abs))
- testInt64x2Unary(t, simd.Int64x2.Absolute, map1[int64](abs))
- testInt64x4Unary(t, simd.Int64x4.Absolute, map1[int64](abs))
- testInt64x8Unary(t, simd.Int64x8.Absolute, map1[int64](abs))
+ testInt8x64Unary(t, simd.Int8x64.Abs, map1[int8](abs))
+ testInt16x32Unary(t, simd.Int16x32.Abs, map1[int16](abs))
+ testInt32x16Unary(t, simd.Int32x16.Abs, map1[int32](abs))
+ testInt64x2Unary(t, simd.Int64x2.Abs, map1[int64](abs))
+ testInt64x4Unary(t, simd.Int64x4.Abs, map1[int64](abs))
+ testInt64x8Unary(t, simd.Int64x8.Abs, map1[int64](abs))
}
}